sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 191 "CONCAT": lambda args, dialect: exp.Concat( 192 expressions=args, 193 safe=not dialect.STRICT_STRING_CONCAT, 194 coalesce=dialect.CONCAT_COALESCE, 195 ), 196 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 197 expressions=args, 198 safe=not dialect.STRICT_STRING_CONCAT, 199 coalesce=dialect.CONCAT_COALESCE, 200 ), 201 "CONVERT_TIMEZONE": build_convert_timezone, 202 "DATE_TO_DATE_STR": lambda args: exp.Cast( 203 this=seq_get(args, 0), 204 to=exp.DataType(this=exp.DataType.Type.TEXT), 205 ), 206 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 207 start=seq_get(args, 0), 208 end=seq_get(args, 1), 209 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 210 ), 211 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 212 "HEX": build_hex, 213 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 214 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 215 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 216 "LIKE": build_like, 217 "LOG": build_logarithm, 218 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 219 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 220 "LOWER": build_lower, 221 "LPAD": lambda args: build_pad(args), 222 "LEFTPAD": lambda args: build_pad(args), 223 "LTRIM": lambda args: build_trim(args), 224 "MOD": build_mod, 225 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 226 "RPAD": lambda args: build_pad(args, is_left=False), 227 "RTRIM": lambda args: build_trim(args, is_left=False), 228 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 229 if len(args) != 2 230 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 231 "TIME_TO_TIME_STR": lambda args: exp.Cast( 232 this=seq_get(args, 0), 233 to=exp.DataType(this=exp.DataType.Type.TEXT), 234 ), 235 "TO_HEX": build_hex, 236 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 237 this=exp.Cast( 238 this=seq_get(args, 0), 239 to=exp.DataType(this=exp.DataType.Type.TEXT), 240 ), 241 start=exp.Literal.number(1), 242 length=exp.Literal.number(10), 243 ), 244 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 245 "UPPER": build_upper, 246 "VAR_MAP": build_var_map, 247 } 248 249 NO_PAREN_FUNCTIONS = { 250 TokenType.CURRENT_DATE: exp.CurrentDate, 251 TokenType.CURRENT_DATETIME: exp.CurrentDate, 252 TokenType.CURRENT_TIME: exp.CurrentTime, 253 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 254 TokenType.CURRENT_USER: exp.CurrentUser, 255 } 256 257 STRUCT_TYPE_TOKENS = { 258 TokenType.NESTED, 259 TokenType.OBJECT, 260 TokenType.STRUCT, 261 } 262 263 NESTED_TYPE_TOKENS = { 264 TokenType.ARRAY, 265 TokenType.LIST, 266 TokenType.LOWCARDINALITY, 267 TokenType.MAP, 268 TokenType.NULLABLE, 269 *STRUCT_TYPE_TOKENS, 270 } 271 272 ENUM_TYPE_TOKENS = { 273 TokenType.ENUM, 274 TokenType.ENUM8, 275 TokenType.ENUM16, 276 } 277 278 AGGREGATE_TYPE_TOKENS = { 279 TokenType.AGGREGATEFUNCTION, 280 TokenType.SIMPLEAGGREGATEFUNCTION, 281 } 282 283 TYPE_TOKENS = { 284 TokenType.BIT, 285 TokenType.BOOLEAN, 286 TokenType.TINYINT, 287 TokenType.UTINYINT, 288 TokenType.SMALLINT, 289 TokenType.USMALLINT, 290 TokenType.INT, 291 TokenType.UINT, 292 TokenType.BIGINT, 293 TokenType.UBIGINT, 294 TokenType.INT128, 295 TokenType.UINT128, 296 TokenType.INT256, 297 TokenType.UINT256, 298 TokenType.MEDIUMINT, 299 TokenType.UMEDIUMINT, 300 TokenType.FIXEDSTRING, 301 TokenType.FLOAT, 302 TokenType.DOUBLE, 303 TokenType.CHAR, 304 TokenType.NCHAR, 305 TokenType.VARCHAR, 306 TokenType.NVARCHAR, 307 TokenType.BPCHAR, 308 TokenType.TEXT, 309 TokenType.MEDIUMTEXT, 310 TokenType.LONGTEXT, 311 TokenType.MEDIUMBLOB, 312 TokenType.LONGBLOB, 313 TokenType.BINARY, 314 TokenType.VARBINARY, 315 TokenType.JSON, 316 TokenType.JSONB, 317 TokenType.INTERVAL, 318 TokenType.TINYBLOB, 319 TokenType.TINYTEXT, 320 TokenType.TIME, 321 TokenType.TIMETZ, 322 TokenType.TIMESTAMP, 323 TokenType.TIMESTAMP_S, 324 TokenType.TIMESTAMP_MS, 325 TokenType.TIMESTAMP_NS, 326 TokenType.TIMESTAMPTZ, 327 TokenType.TIMESTAMPLTZ, 328 TokenType.TIMESTAMPNTZ, 329 TokenType.DATETIME, 330 TokenType.DATETIME64, 331 TokenType.DATE, 332 TokenType.DATE32, 333 TokenType.INT4RANGE, 334 TokenType.INT4MULTIRANGE, 335 TokenType.INT8RANGE, 336 TokenType.INT8MULTIRANGE, 337 TokenType.NUMRANGE, 338 TokenType.NUMMULTIRANGE, 339 TokenType.TSRANGE, 340 TokenType.TSMULTIRANGE, 341 TokenType.TSTZRANGE, 342 TokenType.TSTZMULTIRANGE, 343 TokenType.DATERANGE, 344 TokenType.DATEMULTIRANGE, 345 TokenType.DECIMAL, 346 TokenType.DECIMAL32, 347 TokenType.DECIMAL64, 348 TokenType.DECIMAL128, 349 TokenType.UDECIMAL, 350 TokenType.BIGDECIMAL, 351 TokenType.UUID, 352 TokenType.GEOGRAPHY, 353 TokenType.GEOMETRY, 354 TokenType.HLLSKETCH, 355 TokenType.HSTORE, 356 TokenType.PSEUDO_TYPE, 357 TokenType.SUPER, 358 TokenType.SERIAL, 359 TokenType.SMALLSERIAL, 360 TokenType.BIGSERIAL, 361 TokenType.XML, 362 TokenType.YEAR, 363 TokenType.UNIQUEIDENTIFIER, 364 TokenType.USERDEFINED, 365 TokenType.MONEY, 366 TokenType.SMALLMONEY, 367 TokenType.ROWVERSION, 368 TokenType.IMAGE, 369 TokenType.VARIANT, 370 TokenType.VECTOR, 371 TokenType.OBJECT, 372 TokenType.OBJECT_IDENTIFIER, 373 TokenType.INET, 374 TokenType.IPADDRESS, 375 TokenType.IPPREFIX, 376 TokenType.IPV4, 377 TokenType.IPV6, 378 TokenType.UNKNOWN, 379 TokenType.NULL, 380 TokenType.NAME, 381 TokenType.TDIGEST, 382 *ENUM_TYPE_TOKENS, 383 *NESTED_TYPE_TOKENS, 384 *AGGREGATE_TYPE_TOKENS, 385 } 386 387 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 388 TokenType.BIGINT: TokenType.UBIGINT, 389 TokenType.INT: TokenType.UINT, 390 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 391 TokenType.SMALLINT: TokenType.USMALLINT, 392 TokenType.TINYINT: TokenType.UTINYINT, 393 TokenType.DECIMAL: TokenType.UDECIMAL, 394 } 395 396 SUBQUERY_PREDICATES = { 397 TokenType.ANY: exp.Any, 398 TokenType.ALL: exp.All, 399 TokenType.EXISTS: exp.Exists, 400 TokenType.SOME: exp.Any, 401 } 402 403 RESERVED_TOKENS = { 404 *Tokenizer.SINGLE_TOKENS.values(), 405 TokenType.SELECT, 406 } - {TokenType.IDENTIFIER} 407 408 DB_CREATABLES = { 409 TokenType.DATABASE, 410 TokenType.DICTIONARY, 411 TokenType.MODEL, 412 TokenType.SCHEMA, 413 TokenType.SEQUENCE, 414 TokenType.STORAGE_INTEGRATION, 415 TokenType.TABLE, 416 TokenType.TAG, 417 TokenType.VIEW, 418 TokenType.WAREHOUSE, 419 TokenType.STREAMLIT, 420 } 421 422 CREATABLES = { 423 TokenType.COLUMN, 424 TokenType.CONSTRAINT, 425 TokenType.FOREIGN_KEY, 426 TokenType.FUNCTION, 427 TokenType.INDEX, 428 TokenType.PROCEDURE, 429 *DB_CREATABLES, 430 } 431 432 ALTERABLES = { 433 TokenType.TABLE, 434 TokenType.VIEW, 435 } 436 437 # Tokens that can represent identifiers 438 ID_VAR_TOKENS = { 439 TokenType.ALL, 440 TokenType.VAR, 441 TokenType.ANTI, 442 TokenType.APPLY, 443 TokenType.ASC, 444 TokenType.ASOF, 445 TokenType.AUTO_INCREMENT, 446 TokenType.BEGIN, 447 TokenType.BPCHAR, 448 TokenType.CACHE, 449 TokenType.CASE, 450 TokenType.COLLATE, 451 TokenType.COMMAND, 452 TokenType.COMMENT, 453 TokenType.COMMIT, 454 TokenType.CONSTRAINT, 455 TokenType.COPY, 456 TokenType.CUBE, 457 TokenType.DEFAULT, 458 TokenType.DELETE, 459 TokenType.DESC, 460 TokenType.DESCRIBE, 461 TokenType.DICTIONARY, 462 TokenType.DIV, 463 TokenType.END, 464 TokenType.EXECUTE, 465 TokenType.ESCAPE, 466 TokenType.FALSE, 467 TokenType.FIRST, 468 TokenType.FILTER, 469 TokenType.FINAL, 470 TokenType.FORMAT, 471 TokenType.FULL, 472 TokenType.IDENTIFIER, 473 TokenType.IS, 474 TokenType.ISNULL, 475 TokenType.INTERVAL, 476 TokenType.KEEP, 477 TokenType.KILL, 478 TokenType.LEFT, 479 TokenType.LOAD, 480 TokenType.MERGE, 481 TokenType.NATURAL, 482 TokenType.NEXT, 483 TokenType.OFFSET, 484 TokenType.OPERATOR, 485 TokenType.ORDINALITY, 486 TokenType.OVERLAPS, 487 TokenType.OVERWRITE, 488 TokenType.PARTITION, 489 TokenType.PERCENT, 490 TokenType.PIVOT, 491 TokenType.PRAGMA, 492 TokenType.RANGE, 493 TokenType.RECURSIVE, 494 TokenType.REFERENCES, 495 TokenType.REFRESH, 496 TokenType.RENAME, 497 TokenType.REPLACE, 498 TokenType.RIGHT, 499 TokenType.ROLLUP, 500 TokenType.ROW, 501 TokenType.ROWS, 502 TokenType.SEMI, 503 TokenType.SET, 504 TokenType.SETTINGS, 505 TokenType.SHOW, 506 TokenType.TEMPORARY, 507 TokenType.TOP, 508 TokenType.TRUE, 509 TokenType.TRUNCATE, 510 TokenType.UNIQUE, 511 TokenType.UNNEST, 512 TokenType.UNPIVOT, 513 TokenType.UPDATE, 514 TokenType.USE, 515 TokenType.VOLATILE, 516 TokenType.WINDOW, 517 *CREATABLES, 518 *SUBQUERY_PREDICATES, 519 *TYPE_TOKENS, 520 *NO_PAREN_FUNCTIONS, 521 } 522 523 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 524 525 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 526 TokenType.ANTI, 527 TokenType.APPLY, 528 TokenType.ASOF, 529 TokenType.FULL, 530 TokenType.LEFT, 531 TokenType.LOCK, 532 TokenType.NATURAL, 533 TokenType.OFFSET, 534 TokenType.RIGHT, 535 TokenType.SEMI, 536 TokenType.WINDOW, 537 } 538 539 ALIAS_TOKENS = ID_VAR_TOKENS 540 541 ARRAY_CONSTRUCTORS = { 542 "ARRAY": exp.Array, 543 "LIST": exp.List, 544 } 545 546 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 547 548 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 549 550 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 551 552 FUNC_TOKENS = { 553 TokenType.COLLATE, 554 TokenType.COMMAND, 555 TokenType.CURRENT_DATE, 556 TokenType.CURRENT_DATETIME, 557 TokenType.CURRENT_TIMESTAMP, 558 TokenType.CURRENT_TIME, 559 TokenType.CURRENT_USER, 560 TokenType.FILTER, 561 TokenType.FIRST, 562 TokenType.FORMAT, 563 TokenType.GLOB, 564 TokenType.IDENTIFIER, 565 TokenType.INDEX, 566 TokenType.ISNULL, 567 TokenType.ILIKE, 568 TokenType.INSERT, 569 TokenType.LIKE, 570 TokenType.MERGE, 571 TokenType.OFFSET, 572 TokenType.PRIMARY_KEY, 573 TokenType.RANGE, 574 TokenType.REPLACE, 575 TokenType.RLIKE, 576 TokenType.ROW, 577 TokenType.UNNEST, 578 TokenType.VAR, 579 TokenType.LEFT, 580 TokenType.RIGHT, 581 TokenType.SEQUENCE, 582 TokenType.DATE, 583 TokenType.DATETIME, 584 TokenType.TABLE, 585 TokenType.TIMESTAMP, 586 TokenType.TIMESTAMPTZ, 587 TokenType.TRUNCATE, 588 TokenType.WINDOW, 589 TokenType.XOR, 590 *TYPE_TOKENS, 591 *SUBQUERY_PREDICATES, 592 } 593 594 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 595 TokenType.AND: exp.And, 596 } 597 598 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 599 TokenType.COLON_EQ: exp.PropertyEQ, 600 } 601 602 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 603 TokenType.OR: exp.Or, 604 } 605 606 EQUALITY = { 607 TokenType.EQ: exp.EQ, 608 TokenType.NEQ: exp.NEQ, 609 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 610 } 611 612 COMPARISON = { 613 TokenType.GT: exp.GT, 614 TokenType.GTE: exp.GTE, 615 TokenType.LT: exp.LT, 616 TokenType.LTE: exp.LTE, 617 } 618 619 BITWISE = { 620 TokenType.AMP: exp.BitwiseAnd, 621 TokenType.CARET: exp.BitwiseXor, 622 TokenType.PIPE: exp.BitwiseOr, 623 } 624 625 TERM = { 626 TokenType.DASH: exp.Sub, 627 TokenType.PLUS: exp.Add, 628 TokenType.MOD: exp.Mod, 629 TokenType.COLLATE: exp.Collate, 630 } 631 632 FACTOR = { 633 TokenType.DIV: exp.IntDiv, 634 TokenType.LR_ARROW: exp.Distance, 635 TokenType.SLASH: exp.Div, 636 TokenType.STAR: exp.Mul, 637 } 638 639 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 640 641 TIMES = { 642 TokenType.TIME, 643 TokenType.TIMETZ, 644 } 645 646 TIMESTAMPS = { 647 TokenType.TIMESTAMP, 648 TokenType.TIMESTAMPTZ, 649 TokenType.TIMESTAMPLTZ, 650 *TIMES, 651 } 652 653 SET_OPERATIONS = { 654 TokenType.UNION, 655 TokenType.INTERSECT, 656 TokenType.EXCEPT, 657 } 658 659 JOIN_METHODS = { 660 TokenType.ASOF, 661 TokenType.NATURAL, 662 TokenType.POSITIONAL, 663 } 664 665 JOIN_SIDES = { 666 TokenType.LEFT, 667 TokenType.RIGHT, 668 TokenType.FULL, 669 } 670 671 JOIN_KINDS = { 672 TokenType.ANTI, 673 TokenType.CROSS, 674 TokenType.INNER, 675 TokenType.OUTER, 676 TokenType.SEMI, 677 TokenType.STRAIGHT_JOIN, 678 } 679 680 JOIN_HINTS: t.Set[str] = set() 681 682 LAMBDAS = { 683 TokenType.ARROW: lambda self, expressions: self.expression( 684 exp.Lambda, 685 this=self._replace_lambda( 686 self._parse_assignment(), 687 expressions, 688 ), 689 expressions=expressions, 690 ), 691 TokenType.FARROW: lambda self, expressions: self.expression( 692 exp.Kwarg, 693 this=exp.var(expressions[0].name), 694 expression=self._parse_assignment(), 695 ), 696 } 697 698 COLUMN_OPERATORS = { 699 TokenType.DOT: None, 700 TokenType.DCOLON: lambda self, this, to: self.expression( 701 exp.Cast if self.STRICT_CAST else exp.TryCast, 702 this=this, 703 to=to, 704 ), 705 TokenType.ARROW: lambda self, this, path: self.expression( 706 exp.JSONExtract, 707 this=this, 708 expression=self.dialect.to_json_path(path), 709 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 710 ), 711 TokenType.DARROW: lambda self, this, path: self.expression( 712 exp.JSONExtractScalar, 713 this=this, 714 expression=self.dialect.to_json_path(path), 715 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 716 ), 717 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 718 exp.JSONBExtract, 719 this=this, 720 expression=path, 721 ), 722 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 723 exp.JSONBExtractScalar, 724 this=this, 725 expression=path, 726 ), 727 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 728 exp.JSONBContains, 729 this=this, 730 expression=key, 731 ), 732 } 733 734 EXPRESSION_PARSERS = { 735 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 736 exp.Column: lambda self: self._parse_column(), 737 exp.Condition: lambda self: self._parse_assignment(), 738 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 739 exp.Expression: lambda self: self._parse_expression(), 740 exp.From: lambda self: self._parse_from(joins=True), 741 exp.Group: lambda self: self._parse_group(), 742 exp.Having: lambda self: self._parse_having(), 743 exp.Identifier: lambda self: self._parse_id_var(), 744 exp.Join: lambda self: self._parse_join(), 745 exp.Lambda: lambda self: self._parse_lambda(), 746 exp.Lateral: lambda self: self._parse_lateral(), 747 exp.Limit: lambda self: self._parse_limit(), 748 exp.Offset: lambda self: self._parse_offset(), 749 exp.Order: lambda self: self._parse_order(), 750 exp.Ordered: lambda self: self._parse_ordered(), 751 exp.Properties: lambda self: self._parse_properties(), 752 exp.Qualify: lambda self: self._parse_qualify(), 753 exp.Returning: lambda self: self._parse_returning(), 754 exp.Select: lambda self: self._parse_select(), 755 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 756 exp.Table: lambda self: self._parse_table_parts(), 757 exp.TableAlias: lambda self: self._parse_table_alias(), 758 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 759 exp.Where: lambda self: self._parse_where(), 760 exp.Window: lambda self: self._parse_named_window(), 761 exp.With: lambda self: self._parse_with(), 762 "JOIN_TYPE": lambda self: self._parse_join_parts(), 763 } 764 765 STATEMENT_PARSERS = { 766 TokenType.ALTER: lambda self: self._parse_alter(), 767 TokenType.BEGIN: lambda self: self._parse_transaction(), 768 TokenType.CACHE: lambda self: self._parse_cache(), 769 TokenType.COMMENT: lambda self: self._parse_comment(), 770 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 771 TokenType.COPY: lambda self: self._parse_copy(), 772 TokenType.CREATE: lambda self: self._parse_create(), 773 TokenType.DELETE: lambda self: self._parse_delete(), 774 TokenType.DESC: lambda self: self._parse_describe(), 775 TokenType.DESCRIBE: lambda self: self._parse_describe(), 776 TokenType.DROP: lambda self: self._parse_drop(), 777 TokenType.INSERT: lambda self: self._parse_insert(), 778 TokenType.KILL: lambda self: self._parse_kill(), 779 TokenType.LOAD: lambda self: self._parse_load(), 780 TokenType.MERGE: lambda self: self._parse_merge(), 781 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 782 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 783 TokenType.REFRESH: lambda self: self._parse_refresh(), 784 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 785 TokenType.SET: lambda self: self._parse_set(), 786 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 787 TokenType.UNCACHE: lambda self: self._parse_uncache(), 788 TokenType.UPDATE: lambda self: self._parse_update(), 789 TokenType.USE: lambda self: self.expression( 790 exp.Use, 791 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 792 this=self._parse_table(schema=False), 793 ), 794 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 795 } 796 797 UNARY_PARSERS = { 798 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 799 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 800 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 801 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 802 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 803 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 804 } 805 806 STRING_PARSERS = { 807 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 808 exp.RawString, this=token.text 809 ), 810 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 811 exp.National, this=token.text 812 ), 813 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 814 TokenType.STRING: lambda self, token: self.expression( 815 exp.Literal, this=token.text, is_string=True 816 ), 817 TokenType.UNICODE_STRING: lambda self, token: self.expression( 818 exp.UnicodeString, 819 this=token.text, 820 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 821 ), 822 } 823 824 NUMERIC_PARSERS = { 825 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 826 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 827 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 828 TokenType.NUMBER: lambda self, token: self.expression( 829 exp.Literal, this=token.text, is_string=False 830 ), 831 } 832 833 PRIMARY_PARSERS = { 834 **STRING_PARSERS, 835 **NUMERIC_PARSERS, 836 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 837 TokenType.NULL: lambda self, _: self.expression(exp.Null), 838 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 839 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 840 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 841 TokenType.STAR: lambda self, _: self.expression( 842 exp.Star, 843 **{ 844 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 845 "replace": self._parse_star_op("REPLACE"), 846 "rename": self._parse_star_op("RENAME"), 847 }, 848 ), 849 } 850 851 PLACEHOLDER_PARSERS = { 852 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 853 TokenType.PARAMETER: lambda self: self._parse_parameter(), 854 TokenType.COLON: lambda self: ( 855 self.expression(exp.Placeholder, this=self._prev.text) 856 if self._match_set(self.ID_VAR_TOKENS) 857 else None 858 ), 859 } 860 861 RANGE_PARSERS = { 862 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 863 TokenType.GLOB: binary_range_parser(exp.Glob), 864 TokenType.ILIKE: binary_range_parser(exp.ILike), 865 TokenType.IN: lambda self, this: self._parse_in(this), 866 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 867 TokenType.IS: lambda self, this: self._parse_is(this), 868 TokenType.LIKE: binary_range_parser(exp.Like), 869 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 870 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 871 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 872 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 873 } 874 875 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 876 "ALLOWED_VALUES": lambda self: self.expression( 877 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 878 ), 879 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 880 "AUTO": lambda self: self._parse_auto_property(), 881 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 882 "BACKUP": lambda self: self.expression( 883 exp.BackupProperty, this=self._parse_var(any_token=True) 884 ), 885 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 886 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 887 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 888 "CHECKSUM": lambda self: self._parse_checksum(), 889 "CLUSTER BY": lambda self: self._parse_cluster(), 890 "CLUSTERED": lambda self: self._parse_clustered_by(), 891 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 892 exp.CollateProperty, **kwargs 893 ), 894 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 895 "CONTAINS": lambda self: self._parse_contains_property(), 896 "COPY": lambda self: self._parse_copy_property(), 897 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 898 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 899 "DEFINER": lambda self: self._parse_definer(), 900 "DETERMINISTIC": lambda self: self.expression( 901 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 902 ), 903 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 904 "DUPLICATE": lambda self: self._parse_duplicate(), 905 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 906 "DISTKEY": lambda self: self._parse_distkey(), 907 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 908 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 909 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 910 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 911 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 912 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 913 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 914 "FREESPACE": lambda self: self._parse_freespace(), 915 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 916 "HEAP": lambda self: self.expression(exp.HeapProperty), 917 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 918 "IMMUTABLE": lambda self: self.expression( 919 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 920 ), 921 "INHERITS": lambda self: self.expression( 922 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 923 ), 924 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 925 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 926 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 927 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 928 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 929 "LIKE": lambda self: self._parse_create_like(), 930 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 931 "LOCK": lambda self: self._parse_locking(), 932 "LOCKING": lambda self: self._parse_locking(), 933 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 934 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 935 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 936 "MODIFIES": lambda self: self._parse_modifies_property(), 937 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 938 "NO": lambda self: self._parse_no_property(), 939 "ON": lambda self: self._parse_on_property(), 940 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 941 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 942 "PARTITION": lambda self: self._parse_partitioned_of(), 943 "PARTITION BY": lambda self: self._parse_partitioned_by(), 944 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 945 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 946 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 947 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 948 "READS": lambda self: self._parse_reads_property(), 949 "REMOTE": lambda self: self._parse_remote_with_connection(), 950 "RETURNS": lambda self: self._parse_returns(), 951 "STRICT": lambda self: self.expression(exp.StrictProperty), 952 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 953 "ROW": lambda self: self._parse_row(), 954 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 955 "SAMPLE": lambda self: self.expression( 956 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 957 ), 958 "SECURE": lambda self: self.expression(exp.SecureProperty), 959 "SECURITY": lambda self: self._parse_security(), 960 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 961 "SETTINGS": lambda self: self._parse_settings_property(), 962 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 963 "SORTKEY": lambda self: self._parse_sortkey(), 964 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 965 "STABLE": lambda self: self.expression( 966 exp.StabilityProperty, this=exp.Literal.string("STABLE") 967 ), 968 "STORED": lambda self: self._parse_stored(), 969 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 970 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 971 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 972 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 973 "TO": lambda self: self._parse_to_table(), 974 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 975 "TRANSFORM": lambda self: self.expression( 976 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 977 ), 978 "TTL": lambda self: self._parse_ttl(), 979 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 980 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 981 "VOLATILE": lambda self: self._parse_volatile_property(), 982 "WITH": lambda self: self._parse_with_property(), 983 } 984 985 CONSTRAINT_PARSERS = { 986 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 987 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 988 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 989 "CHARACTER SET": lambda self: self.expression( 990 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 991 ), 992 "CHECK": lambda self: self.expression( 993 exp.CheckColumnConstraint, 994 this=self._parse_wrapped(self._parse_assignment), 995 enforced=self._match_text_seq("ENFORCED"), 996 ), 997 "COLLATE": lambda self: self.expression( 998 exp.CollateColumnConstraint, 999 this=self._parse_identifier() or self._parse_column(), 1000 ), 1001 "COMMENT": lambda self: self.expression( 1002 exp.CommentColumnConstraint, this=self._parse_string() 1003 ), 1004 "COMPRESS": lambda self: self._parse_compress(), 1005 "CLUSTERED": lambda self: self.expression( 1006 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1007 ), 1008 "NONCLUSTERED": lambda self: self.expression( 1009 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1010 ), 1011 "DEFAULT": lambda self: self.expression( 1012 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1013 ), 1014 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1015 "EPHEMERAL": lambda self: self.expression( 1016 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1017 ), 1018 "EXCLUDE": lambda self: self.expression( 1019 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1020 ), 1021 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1022 "FORMAT": lambda self: self.expression( 1023 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1024 ), 1025 "GENERATED": lambda self: self._parse_generated_as_identity(), 1026 "IDENTITY": lambda self: self._parse_auto_increment(), 1027 "INLINE": lambda self: self._parse_inline(), 1028 "LIKE": lambda self: self._parse_create_like(), 1029 "NOT": lambda self: self._parse_not_constraint(), 1030 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1031 "ON": lambda self: ( 1032 self._match(TokenType.UPDATE) 1033 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1034 ) 1035 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1036 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1037 "PERIOD": lambda self: self._parse_period_for_system_time(), 1038 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1039 "REFERENCES": lambda self: self._parse_references(match=False), 1040 "TITLE": lambda self: self.expression( 1041 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1042 ), 1043 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1044 "UNIQUE": lambda self: self._parse_unique(), 1045 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1046 "WITH": lambda self: self.expression( 1047 exp.Properties, expressions=self._parse_wrapped_properties() 1048 ), 1049 } 1050 1051 ALTER_PARSERS = { 1052 "ADD": lambda self: self._parse_alter_table_add(), 1053 "ALTER": lambda self: self._parse_alter_table_alter(), 1054 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1055 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1056 "DROP": lambda self: self._parse_alter_table_drop(), 1057 "RENAME": lambda self: self._parse_alter_table_rename(), 1058 "SET": lambda self: self._parse_alter_table_set(), 1059 "AS": lambda self: self._parse_select(), 1060 } 1061 1062 ALTER_ALTER_PARSERS = { 1063 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1064 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1065 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1066 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1067 } 1068 1069 SCHEMA_UNNAMED_CONSTRAINTS = { 1070 "CHECK", 1071 "EXCLUDE", 1072 "FOREIGN KEY", 1073 "LIKE", 1074 "PERIOD", 1075 "PRIMARY KEY", 1076 "UNIQUE", 1077 } 1078 1079 NO_PAREN_FUNCTION_PARSERS = { 1080 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1081 "CASE": lambda self: self._parse_case(), 1082 "CONNECT_BY_ROOT": lambda self: self.expression( 1083 exp.ConnectByRoot, this=self._parse_column() 1084 ), 1085 "IF": lambda self: self._parse_if(), 1086 "NEXT": lambda self: self._parse_next_value_for(), 1087 } 1088 1089 INVALID_FUNC_NAME_TOKENS = { 1090 TokenType.IDENTIFIER, 1091 TokenType.STRING, 1092 } 1093 1094 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1095 1096 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1097 1098 FUNCTION_PARSERS = { 1099 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1100 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1101 "DECODE": lambda self: self._parse_decode(), 1102 "EXTRACT": lambda self: self._parse_extract(), 1103 "GAP_FILL": lambda self: self._parse_gap_fill(), 1104 "JSON_OBJECT": lambda self: self._parse_json_object(), 1105 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1106 "JSON_TABLE": lambda self: self._parse_json_table(), 1107 "MATCH": lambda self: self._parse_match_against(), 1108 "NORMALIZE": lambda self: self._parse_normalize(), 1109 "OPENJSON": lambda self: self._parse_open_json(), 1110 "POSITION": lambda self: self._parse_position(), 1111 "PREDICT": lambda self: self._parse_predict(), 1112 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1113 "STRING_AGG": lambda self: self._parse_string_agg(), 1114 "SUBSTRING": lambda self: self._parse_substring(), 1115 "TRIM": lambda self: self._parse_trim(), 1116 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1117 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1118 } 1119 1120 QUERY_MODIFIER_PARSERS = { 1121 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1122 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1123 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1124 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1125 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1126 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1127 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1128 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1129 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1130 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1131 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1132 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1133 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1134 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1135 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1136 TokenType.CLUSTER_BY: lambda self: ( 1137 "cluster", 1138 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1139 ), 1140 TokenType.DISTRIBUTE_BY: lambda self: ( 1141 "distribute", 1142 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1143 ), 1144 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1145 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1146 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1147 } 1148 1149 SET_PARSERS = { 1150 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1151 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1152 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1153 "TRANSACTION": lambda self: self._parse_set_transaction(), 1154 } 1155 1156 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1157 1158 TYPE_LITERAL_PARSERS = { 1159 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1160 } 1161 1162 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1163 1164 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1165 1166 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1167 1168 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1169 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1170 "ISOLATION": ( 1171 ("LEVEL", "REPEATABLE", "READ"), 1172 ("LEVEL", "READ", "COMMITTED"), 1173 ("LEVEL", "READ", "UNCOMITTED"), 1174 ("LEVEL", "SERIALIZABLE"), 1175 ), 1176 "READ": ("WRITE", "ONLY"), 1177 } 1178 1179 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1180 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1181 ) 1182 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1183 1184 CREATE_SEQUENCE: OPTIONS_TYPE = { 1185 "SCALE": ("EXTEND", "NOEXTEND"), 1186 "SHARD": ("EXTEND", "NOEXTEND"), 1187 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1188 **dict.fromkeys( 1189 ( 1190 "SESSION", 1191 "GLOBAL", 1192 "KEEP", 1193 "NOKEEP", 1194 "ORDER", 1195 "NOORDER", 1196 "NOCACHE", 1197 "CYCLE", 1198 "NOCYCLE", 1199 "NOMINVALUE", 1200 "NOMAXVALUE", 1201 "NOSCALE", 1202 "NOSHARD", 1203 ), 1204 tuple(), 1205 ), 1206 } 1207 1208 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1209 1210 USABLES: OPTIONS_TYPE = dict.fromkeys( 1211 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1212 ) 1213 1214 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1215 1216 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1217 "TYPE": ("EVOLUTION",), 1218 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1219 } 1220 1221 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1222 "NOT": ("ENFORCED",), 1223 "MATCH": ( 1224 "FULL", 1225 "PARTIAL", 1226 "SIMPLE", 1227 ), 1228 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1229 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1230 } 1231 1232 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1233 1234 CLONE_KEYWORDS = {"CLONE", "COPY"} 1235 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1236 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1237 1238 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1239 1240 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1241 1242 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1243 1244 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1245 1246 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1247 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1248 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1249 1250 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1251 1252 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1253 1254 ADD_CONSTRAINT_TOKENS = { 1255 TokenType.CONSTRAINT, 1256 TokenType.FOREIGN_KEY, 1257 TokenType.INDEX, 1258 TokenType.KEY, 1259 TokenType.PRIMARY_KEY, 1260 TokenType.UNIQUE, 1261 } 1262 1263 DISTINCT_TOKENS = {TokenType.DISTINCT} 1264 1265 NULL_TOKENS = {TokenType.NULL} 1266 1267 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1268 1269 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1270 1271 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1272 1273 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1274 1275 ODBC_DATETIME_LITERALS = { 1276 "d": exp.Date, 1277 "t": exp.Time, 1278 "ts": exp.Timestamp, 1279 } 1280 1281 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1282 1283 STRICT_CAST = True 1284 1285 PREFIXED_PIVOT_COLUMNS = False 1286 IDENTIFY_PIVOT_STRINGS = False 1287 1288 LOG_DEFAULTS_TO_LN = False 1289 1290 # Whether ADD is present for each column added by ALTER TABLE 1291 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1292 1293 # Whether the table sample clause expects CSV syntax 1294 TABLESAMPLE_CSV = False 1295 1296 # The default method used for table sampling 1297 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1298 1299 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1300 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1301 1302 # Whether the TRIM function expects the characters to trim as its first argument 1303 TRIM_PATTERN_FIRST = False 1304 1305 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1306 STRING_ALIASES = False 1307 1308 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1309 MODIFIERS_ATTACHED_TO_SET_OP = True 1310 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1311 1312 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1313 NO_PAREN_IF_COMMANDS = True 1314 1315 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1316 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1317 1318 # Whether the `:` operator is used to extract a value from a VARIANT column 1319 COLON_IS_VARIANT_EXTRACT = False 1320 1321 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1322 # If this is True and '(' is not found, the keyword will be treated as an identifier 1323 VALUES_FOLLOWED_BY_PAREN = True 1324 1325 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1326 SUPPORTS_IMPLICIT_UNNEST = False 1327 1328 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1329 INTERVAL_SPANS = True 1330 1331 # Whether a PARTITION clause can follow a table reference 1332 SUPPORTS_PARTITION_SELECTION = False 1333 1334 __slots__ = ( 1335 "error_level", 1336 "error_message_context", 1337 "max_errors", 1338 "dialect", 1339 "sql", 1340 "errors", 1341 "_tokens", 1342 "_index", 1343 "_curr", 1344 "_next", 1345 "_prev", 1346 "_prev_comments", 1347 ) 1348 1349 # Autofilled 1350 SHOW_TRIE: t.Dict = {} 1351 SET_TRIE: t.Dict = {} 1352 1353 def __init__( 1354 self, 1355 error_level: t.Optional[ErrorLevel] = None, 1356 error_message_context: int = 100, 1357 max_errors: int = 3, 1358 dialect: DialectType = None, 1359 ): 1360 from sqlglot.dialects import Dialect 1361 1362 self.error_level = error_level or ErrorLevel.IMMEDIATE 1363 self.error_message_context = error_message_context 1364 self.max_errors = max_errors 1365 self.dialect = Dialect.get_or_raise(dialect) 1366 self.reset() 1367 1368 def reset(self): 1369 self.sql = "" 1370 self.errors = [] 1371 self._tokens = [] 1372 self._index = 0 1373 self._curr = None 1374 self._next = None 1375 self._prev = None 1376 self._prev_comments = None 1377 1378 def parse( 1379 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1380 ) -> t.List[t.Optional[exp.Expression]]: 1381 """ 1382 Parses a list of tokens and returns a list of syntax trees, one tree 1383 per parsed SQL statement. 1384 1385 Args: 1386 raw_tokens: The list of tokens. 1387 sql: The original SQL string, used to produce helpful debug messages. 1388 1389 Returns: 1390 The list of the produced syntax trees. 1391 """ 1392 return self._parse( 1393 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1394 ) 1395 1396 def parse_into( 1397 self, 1398 expression_types: exp.IntoType, 1399 raw_tokens: t.List[Token], 1400 sql: t.Optional[str] = None, 1401 ) -> t.List[t.Optional[exp.Expression]]: 1402 """ 1403 Parses a list of tokens into a given Expression type. If a collection of Expression 1404 types is given instead, this method will try to parse the token list into each one 1405 of them, stopping at the first for which the parsing succeeds. 1406 1407 Args: 1408 expression_types: The expression type(s) to try and parse the token list into. 1409 raw_tokens: The list of tokens. 1410 sql: The original SQL string, used to produce helpful debug messages. 1411 1412 Returns: 1413 The target Expression. 1414 """ 1415 errors = [] 1416 for expression_type in ensure_list(expression_types): 1417 parser = self.EXPRESSION_PARSERS.get(expression_type) 1418 if not parser: 1419 raise TypeError(f"No parser registered for {expression_type}") 1420 1421 try: 1422 return self._parse(parser, raw_tokens, sql) 1423 except ParseError as e: 1424 e.errors[0]["into_expression"] = expression_type 1425 errors.append(e) 1426 1427 raise ParseError( 1428 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1429 errors=merge_errors(errors), 1430 ) from errors[-1] 1431 1432 def _parse( 1433 self, 1434 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1435 raw_tokens: t.List[Token], 1436 sql: t.Optional[str] = None, 1437 ) -> t.List[t.Optional[exp.Expression]]: 1438 self.reset() 1439 self.sql = sql or "" 1440 1441 total = len(raw_tokens) 1442 chunks: t.List[t.List[Token]] = [[]] 1443 1444 for i, token in enumerate(raw_tokens): 1445 if token.token_type == TokenType.SEMICOLON: 1446 if token.comments: 1447 chunks.append([token]) 1448 1449 if i < total - 1: 1450 chunks.append([]) 1451 else: 1452 chunks[-1].append(token) 1453 1454 expressions = [] 1455 1456 for tokens in chunks: 1457 self._index = -1 1458 self._tokens = tokens 1459 self._advance() 1460 1461 expressions.append(parse_method(self)) 1462 1463 if self._index < len(self._tokens): 1464 self.raise_error("Invalid expression / Unexpected token") 1465 1466 self.check_errors() 1467 1468 return expressions 1469 1470 def check_errors(self) -> None: 1471 """Logs or raises any found errors, depending on the chosen error level setting.""" 1472 if self.error_level == ErrorLevel.WARN: 1473 for error in self.errors: 1474 logger.error(str(error)) 1475 elif self.error_level == ErrorLevel.RAISE and self.errors: 1476 raise ParseError( 1477 concat_messages(self.errors, self.max_errors), 1478 errors=merge_errors(self.errors), 1479 ) 1480 1481 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1482 """ 1483 Appends an error in the list of recorded errors or raises it, depending on the chosen 1484 error level setting. 1485 """ 1486 token = token or self._curr or self._prev or Token.string("") 1487 start = token.start 1488 end = token.end + 1 1489 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1490 highlight = self.sql[start:end] 1491 end_context = self.sql[end : end + self.error_message_context] 1492 1493 error = ParseError.new( 1494 f"{message}. Line {token.line}, Col: {token.col}.\n" 1495 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1496 description=message, 1497 line=token.line, 1498 col=token.col, 1499 start_context=start_context, 1500 highlight=highlight, 1501 end_context=end_context, 1502 ) 1503 1504 if self.error_level == ErrorLevel.IMMEDIATE: 1505 raise error 1506 1507 self.errors.append(error) 1508 1509 def expression( 1510 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1511 ) -> E: 1512 """ 1513 Creates a new, validated Expression. 1514 1515 Args: 1516 exp_class: The expression class to instantiate. 1517 comments: An optional list of comments to attach to the expression. 1518 kwargs: The arguments to set for the expression along with their respective values. 1519 1520 Returns: 1521 The target expression. 1522 """ 1523 instance = exp_class(**kwargs) 1524 instance.add_comments(comments) if comments else self._add_comments(instance) 1525 return self.validate_expression(instance) 1526 1527 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1528 if expression and self._prev_comments: 1529 expression.add_comments(self._prev_comments) 1530 self._prev_comments = None 1531 1532 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1533 """ 1534 Validates an Expression, making sure that all its mandatory arguments are set. 1535 1536 Args: 1537 expression: The expression to validate. 1538 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1539 1540 Returns: 1541 The validated expression. 1542 """ 1543 if self.error_level != ErrorLevel.IGNORE: 1544 for error_message in expression.error_messages(args): 1545 self.raise_error(error_message) 1546 1547 return expression 1548 1549 def _find_sql(self, start: Token, end: Token) -> str: 1550 return self.sql[start.start : end.end + 1] 1551 1552 def _is_connected(self) -> bool: 1553 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1554 1555 def _advance(self, times: int = 1) -> None: 1556 self._index += times 1557 self._curr = seq_get(self._tokens, self._index) 1558 self._next = seq_get(self._tokens, self._index + 1) 1559 1560 if self._index > 0: 1561 self._prev = self._tokens[self._index - 1] 1562 self._prev_comments = self._prev.comments 1563 else: 1564 self._prev = None 1565 self._prev_comments = None 1566 1567 def _retreat(self, index: int) -> None: 1568 if index != self._index: 1569 self._advance(index - self._index) 1570 1571 def _warn_unsupported(self) -> None: 1572 if len(self._tokens) <= 1: 1573 return 1574 1575 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1576 # interested in emitting a warning for the one being currently processed. 1577 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1578 1579 logger.warning( 1580 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1581 ) 1582 1583 def _parse_command(self) -> exp.Command: 1584 self._warn_unsupported() 1585 return self.expression( 1586 exp.Command, 1587 comments=self._prev_comments, 1588 this=self._prev.text.upper(), 1589 expression=self._parse_string(), 1590 ) 1591 1592 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1593 """ 1594 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1595 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1596 solve this by setting & resetting the parser state accordingly 1597 """ 1598 index = self._index 1599 error_level = self.error_level 1600 1601 self.error_level = ErrorLevel.IMMEDIATE 1602 try: 1603 this = parse_method() 1604 except ParseError: 1605 this = None 1606 finally: 1607 if not this or retreat: 1608 self._retreat(index) 1609 self.error_level = error_level 1610 1611 return this 1612 1613 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1614 start = self._prev 1615 exists = self._parse_exists() if allow_exists else None 1616 1617 self._match(TokenType.ON) 1618 1619 materialized = self._match_text_seq("MATERIALIZED") 1620 kind = self._match_set(self.CREATABLES) and self._prev 1621 if not kind: 1622 return self._parse_as_command(start) 1623 1624 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1625 this = self._parse_user_defined_function(kind=kind.token_type) 1626 elif kind.token_type == TokenType.TABLE: 1627 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1628 elif kind.token_type == TokenType.COLUMN: 1629 this = self._parse_column() 1630 else: 1631 this = self._parse_id_var() 1632 1633 self._match(TokenType.IS) 1634 1635 return self.expression( 1636 exp.Comment, 1637 this=this, 1638 kind=kind.text, 1639 expression=self._parse_string(), 1640 exists=exists, 1641 materialized=materialized, 1642 ) 1643 1644 def _parse_to_table( 1645 self, 1646 ) -> exp.ToTableProperty: 1647 table = self._parse_table_parts(schema=True) 1648 return self.expression(exp.ToTableProperty, this=table) 1649 1650 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1651 def _parse_ttl(self) -> exp.Expression: 1652 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1653 this = self._parse_bitwise() 1654 1655 if self._match_text_seq("DELETE"): 1656 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1657 if self._match_text_seq("RECOMPRESS"): 1658 return self.expression( 1659 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1660 ) 1661 if self._match_text_seq("TO", "DISK"): 1662 return self.expression( 1663 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1664 ) 1665 if self._match_text_seq("TO", "VOLUME"): 1666 return self.expression( 1667 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1668 ) 1669 1670 return this 1671 1672 expressions = self._parse_csv(_parse_ttl_action) 1673 where = self._parse_where() 1674 group = self._parse_group() 1675 1676 aggregates = None 1677 if group and self._match(TokenType.SET): 1678 aggregates = self._parse_csv(self._parse_set_item) 1679 1680 return self.expression( 1681 exp.MergeTreeTTL, 1682 expressions=expressions, 1683 where=where, 1684 group=group, 1685 aggregates=aggregates, 1686 ) 1687 1688 def _parse_statement(self) -> t.Optional[exp.Expression]: 1689 if self._curr is None: 1690 return None 1691 1692 if self._match_set(self.STATEMENT_PARSERS): 1693 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1694 1695 if self._match_set(self.dialect.tokenizer.COMMANDS): 1696 return self._parse_command() 1697 1698 expression = self._parse_expression() 1699 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1700 return self._parse_query_modifiers(expression) 1701 1702 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1703 start = self._prev 1704 temporary = self._match(TokenType.TEMPORARY) 1705 materialized = self._match_text_seq("MATERIALIZED") 1706 1707 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1708 if not kind: 1709 return self._parse_as_command(start) 1710 1711 concurrently = self._match_text_seq("CONCURRENTLY") 1712 if_exists = exists or self._parse_exists() 1713 table = self._parse_table_parts( 1714 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1715 ) 1716 1717 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1718 1719 if self._match(TokenType.L_PAREN, advance=False): 1720 expressions = self._parse_wrapped_csv(self._parse_types) 1721 else: 1722 expressions = None 1723 1724 return self.expression( 1725 exp.Drop, 1726 comments=start.comments, 1727 exists=if_exists, 1728 this=table, 1729 expressions=expressions, 1730 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1731 temporary=temporary, 1732 materialized=materialized, 1733 cascade=self._match_text_seq("CASCADE"), 1734 constraints=self._match_text_seq("CONSTRAINTS"), 1735 purge=self._match_text_seq("PURGE"), 1736 cluster=cluster, 1737 concurrently=concurrently, 1738 ) 1739 1740 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1741 return ( 1742 self._match_text_seq("IF") 1743 and (not not_ or self._match(TokenType.NOT)) 1744 and self._match(TokenType.EXISTS) 1745 ) 1746 1747 def _parse_create(self) -> exp.Create | exp.Command: 1748 # Note: this can't be None because we've matched a statement parser 1749 start = self._prev 1750 comments = self._prev_comments 1751 1752 replace = ( 1753 start.token_type == TokenType.REPLACE 1754 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1755 or self._match_pair(TokenType.OR, TokenType.ALTER) 1756 ) 1757 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1758 1759 unique = self._match(TokenType.UNIQUE) 1760 1761 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1762 clustered = True 1763 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1764 "COLUMNSTORE" 1765 ): 1766 clustered = False 1767 else: 1768 clustered = None 1769 1770 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1771 self._advance() 1772 1773 properties = None 1774 create_token = self._match_set(self.CREATABLES) and self._prev 1775 1776 if not create_token: 1777 # exp.Properties.Location.POST_CREATE 1778 properties = self._parse_properties() 1779 create_token = self._match_set(self.CREATABLES) and self._prev 1780 1781 if not properties or not create_token: 1782 return self._parse_as_command(start) 1783 1784 concurrently = self._match_text_seq("CONCURRENTLY") 1785 exists = self._parse_exists(not_=True) 1786 this = None 1787 expression: t.Optional[exp.Expression] = None 1788 indexes = None 1789 no_schema_binding = None 1790 begin = None 1791 end = None 1792 clone = None 1793 1794 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1795 nonlocal properties 1796 if properties and temp_props: 1797 properties.expressions.extend(temp_props.expressions) 1798 elif temp_props: 1799 properties = temp_props 1800 1801 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1802 this = self._parse_user_defined_function(kind=create_token.token_type) 1803 1804 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1805 extend_props(self._parse_properties()) 1806 1807 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1808 extend_props(self._parse_properties()) 1809 1810 if not expression: 1811 if self._match(TokenType.COMMAND): 1812 expression = self._parse_as_command(self._prev) 1813 else: 1814 begin = self._match(TokenType.BEGIN) 1815 return_ = self._match_text_seq("RETURN") 1816 1817 if self._match(TokenType.STRING, advance=False): 1818 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1819 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1820 expression = self._parse_string() 1821 extend_props(self._parse_properties()) 1822 else: 1823 expression = self._parse_statement() 1824 1825 end = self._match_text_seq("END") 1826 1827 if return_: 1828 expression = self.expression(exp.Return, this=expression) 1829 elif create_token.token_type == TokenType.INDEX: 1830 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1831 if not self._match(TokenType.ON): 1832 index = self._parse_id_var() 1833 anonymous = False 1834 else: 1835 index = None 1836 anonymous = True 1837 1838 this = self._parse_index(index=index, anonymous=anonymous) 1839 elif create_token.token_type in self.DB_CREATABLES: 1840 table_parts = self._parse_table_parts( 1841 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1842 ) 1843 1844 # exp.Properties.Location.POST_NAME 1845 self._match(TokenType.COMMA) 1846 extend_props(self._parse_properties(before=True)) 1847 1848 this = self._parse_schema(this=table_parts) 1849 1850 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1851 extend_props(self._parse_properties()) 1852 1853 self._match(TokenType.ALIAS) 1854 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1855 # exp.Properties.Location.POST_ALIAS 1856 extend_props(self._parse_properties()) 1857 1858 if create_token.token_type == TokenType.SEQUENCE: 1859 expression = self._parse_types() 1860 extend_props(self._parse_properties()) 1861 else: 1862 expression = self._parse_ddl_select() 1863 1864 if create_token.token_type == TokenType.TABLE: 1865 # exp.Properties.Location.POST_EXPRESSION 1866 extend_props(self._parse_properties()) 1867 1868 indexes = [] 1869 while True: 1870 index = self._parse_index() 1871 1872 # exp.Properties.Location.POST_INDEX 1873 extend_props(self._parse_properties()) 1874 if not index: 1875 break 1876 else: 1877 self._match(TokenType.COMMA) 1878 indexes.append(index) 1879 elif create_token.token_type == TokenType.VIEW: 1880 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1881 no_schema_binding = True 1882 1883 shallow = self._match_text_seq("SHALLOW") 1884 1885 if self._match_texts(self.CLONE_KEYWORDS): 1886 copy = self._prev.text.lower() == "copy" 1887 clone = self.expression( 1888 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1889 ) 1890 1891 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1892 return self._parse_as_command(start) 1893 1894 create_kind_text = create_token.text.upper() 1895 return self.expression( 1896 exp.Create, 1897 comments=comments, 1898 this=this, 1899 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1900 replace=replace, 1901 refresh=refresh, 1902 unique=unique, 1903 expression=expression, 1904 exists=exists, 1905 properties=properties, 1906 indexes=indexes, 1907 no_schema_binding=no_schema_binding, 1908 begin=begin, 1909 end=end, 1910 clone=clone, 1911 concurrently=concurrently, 1912 clustered=clustered, 1913 ) 1914 1915 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1916 seq = exp.SequenceProperties() 1917 1918 options = [] 1919 index = self._index 1920 1921 while self._curr: 1922 self._match(TokenType.COMMA) 1923 if self._match_text_seq("INCREMENT"): 1924 self._match_text_seq("BY") 1925 self._match_text_seq("=") 1926 seq.set("increment", self._parse_term()) 1927 elif self._match_text_seq("MINVALUE"): 1928 seq.set("minvalue", self._parse_term()) 1929 elif self._match_text_seq("MAXVALUE"): 1930 seq.set("maxvalue", self._parse_term()) 1931 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1932 self._match_text_seq("=") 1933 seq.set("start", self._parse_term()) 1934 elif self._match_text_seq("CACHE"): 1935 # T-SQL allows empty CACHE which is initialized dynamically 1936 seq.set("cache", self._parse_number() or True) 1937 elif self._match_text_seq("OWNED", "BY"): 1938 # "OWNED BY NONE" is the default 1939 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1940 else: 1941 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1942 if opt: 1943 options.append(opt) 1944 else: 1945 break 1946 1947 seq.set("options", options if options else None) 1948 return None if self._index == index else seq 1949 1950 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1951 # only used for teradata currently 1952 self._match(TokenType.COMMA) 1953 1954 kwargs = { 1955 "no": self._match_text_seq("NO"), 1956 "dual": self._match_text_seq("DUAL"), 1957 "before": self._match_text_seq("BEFORE"), 1958 "default": self._match_text_seq("DEFAULT"), 1959 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1960 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1961 "after": self._match_text_seq("AFTER"), 1962 "minimum": self._match_texts(("MIN", "MINIMUM")), 1963 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1964 } 1965 1966 if self._match_texts(self.PROPERTY_PARSERS): 1967 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1968 try: 1969 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1970 except TypeError: 1971 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1972 1973 return None 1974 1975 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1976 return self._parse_wrapped_csv(self._parse_property) 1977 1978 def _parse_property(self) -> t.Optional[exp.Expression]: 1979 if self._match_texts(self.PROPERTY_PARSERS): 1980 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1981 1982 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1983 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1984 1985 if self._match_text_seq("COMPOUND", "SORTKEY"): 1986 return self._parse_sortkey(compound=True) 1987 1988 if self._match_text_seq("SQL", "SECURITY"): 1989 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1990 1991 index = self._index 1992 key = self._parse_column() 1993 1994 if not self._match(TokenType.EQ): 1995 self._retreat(index) 1996 return self._parse_sequence_properties() 1997 1998 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1999 if isinstance(key, exp.Column): 2000 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2001 2002 value = self._parse_bitwise() or self._parse_var(any_token=True) 2003 2004 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2005 if isinstance(value, exp.Column): 2006 value = exp.var(value.name) 2007 2008 return self.expression(exp.Property, this=key, value=value) 2009 2010 def _parse_stored(self) -> exp.FileFormatProperty: 2011 self._match(TokenType.ALIAS) 2012 2013 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2014 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2015 2016 return self.expression( 2017 exp.FileFormatProperty, 2018 this=( 2019 self.expression( 2020 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2021 ) 2022 if input_format or output_format 2023 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2024 ), 2025 ) 2026 2027 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2028 field = self._parse_field() 2029 if isinstance(field, exp.Identifier) and not field.quoted: 2030 field = exp.var(field) 2031 2032 return field 2033 2034 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2035 self._match(TokenType.EQ) 2036 self._match(TokenType.ALIAS) 2037 2038 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2039 2040 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2041 properties = [] 2042 while True: 2043 if before: 2044 prop = self._parse_property_before() 2045 else: 2046 prop = self._parse_property() 2047 if not prop: 2048 break 2049 for p in ensure_list(prop): 2050 properties.append(p) 2051 2052 if properties: 2053 return self.expression(exp.Properties, expressions=properties) 2054 2055 return None 2056 2057 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2058 return self.expression( 2059 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2060 ) 2061 2062 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2063 if self._match_texts(("DEFINER", "INVOKER")): 2064 security_specifier = self._prev.text.upper() 2065 return self.expression(exp.SecurityProperty, this=security_specifier) 2066 return None 2067 2068 def _parse_settings_property(self) -> exp.SettingsProperty: 2069 return self.expression( 2070 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2071 ) 2072 2073 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2074 if self._index >= 2: 2075 pre_volatile_token = self._tokens[self._index - 2] 2076 else: 2077 pre_volatile_token = None 2078 2079 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2080 return exp.VolatileProperty() 2081 2082 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2083 2084 def _parse_retention_period(self) -> exp.Var: 2085 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2086 number = self._parse_number() 2087 number_str = f"{number} " if number else "" 2088 unit = self._parse_var(any_token=True) 2089 return exp.var(f"{number_str}{unit}") 2090 2091 def _parse_system_versioning_property( 2092 self, with_: bool = False 2093 ) -> exp.WithSystemVersioningProperty: 2094 self._match(TokenType.EQ) 2095 prop = self.expression( 2096 exp.WithSystemVersioningProperty, 2097 **{ # type: ignore 2098 "on": True, 2099 "with": with_, 2100 }, 2101 ) 2102 2103 if self._match_text_seq("OFF"): 2104 prop.set("on", False) 2105 return prop 2106 2107 self._match(TokenType.ON) 2108 if self._match(TokenType.L_PAREN): 2109 while self._curr and not self._match(TokenType.R_PAREN): 2110 if self._match_text_seq("HISTORY_TABLE", "="): 2111 prop.set("this", self._parse_table_parts()) 2112 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2113 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2114 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2115 prop.set("retention_period", self._parse_retention_period()) 2116 2117 self._match(TokenType.COMMA) 2118 2119 return prop 2120 2121 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2122 self._match(TokenType.EQ) 2123 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2124 prop = self.expression(exp.DataDeletionProperty, on=on) 2125 2126 if self._match(TokenType.L_PAREN): 2127 while self._curr and not self._match(TokenType.R_PAREN): 2128 if self._match_text_seq("FILTER_COLUMN", "="): 2129 prop.set("filter_column", self._parse_column()) 2130 elif self._match_text_seq("RETENTION_PERIOD", "="): 2131 prop.set("retention_period", self._parse_retention_period()) 2132 2133 self._match(TokenType.COMMA) 2134 2135 return prop 2136 2137 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2138 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2139 prop = self._parse_system_versioning_property(with_=True) 2140 self._match_r_paren() 2141 return prop 2142 2143 if self._match(TokenType.L_PAREN, advance=False): 2144 return self._parse_wrapped_properties() 2145 2146 if self._match_text_seq("JOURNAL"): 2147 return self._parse_withjournaltable() 2148 2149 if self._match_texts(self.VIEW_ATTRIBUTES): 2150 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2151 2152 if self._match_text_seq("DATA"): 2153 return self._parse_withdata(no=False) 2154 elif self._match_text_seq("NO", "DATA"): 2155 return self._parse_withdata(no=True) 2156 2157 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2158 return self._parse_serde_properties(with_=True) 2159 2160 if self._match(TokenType.SCHEMA): 2161 return self.expression( 2162 exp.WithSchemaBindingProperty, 2163 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2164 ) 2165 2166 if not self._next: 2167 return None 2168 2169 return self._parse_withisolatedloading() 2170 2171 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2172 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2173 self._match(TokenType.EQ) 2174 2175 user = self._parse_id_var() 2176 self._match(TokenType.PARAMETER) 2177 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2178 2179 if not user or not host: 2180 return None 2181 2182 return exp.DefinerProperty(this=f"{user}@{host}") 2183 2184 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2185 self._match(TokenType.TABLE) 2186 self._match(TokenType.EQ) 2187 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2188 2189 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2190 return self.expression(exp.LogProperty, no=no) 2191 2192 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2193 return self.expression(exp.JournalProperty, **kwargs) 2194 2195 def _parse_checksum(self) -> exp.ChecksumProperty: 2196 self._match(TokenType.EQ) 2197 2198 on = None 2199 if self._match(TokenType.ON): 2200 on = True 2201 elif self._match_text_seq("OFF"): 2202 on = False 2203 2204 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2205 2206 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2207 return self.expression( 2208 exp.Cluster, 2209 expressions=( 2210 self._parse_wrapped_csv(self._parse_ordered) 2211 if wrapped 2212 else self._parse_csv(self._parse_ordered) 2213 ), 2214 ) 2215 2216 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2217 self._match_text_seq("BY") 2218 2219 self._match_l_paren() 2220 expressions = self._parse_csv(self._parse_column) 2221 self._match_r_paren() 2222 2223 if self._match_text_seq("SORTED", "BY"): 2224 self._match_l_paren() 2225 sorted_by = self._parse_csv(self._parse_ordered) 2226 self._match_r_paren() 2227 else: 2228 sorted_by = None 2229 2230 self._match(TokenType.INTO) 2231 buckets = self._parse_number() 2232 self._match_text_seq("BUCKETS") 2233 2234 return self.expression( 2235 exp.ClusteredByProperty, 2236 expressions=expressions, 2237 sorted_by=sorted_by, 2238 buckets=buckets, 2239 ) 2240 2241 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2242 if not self._match_text_seq("GRANTS"): 2243 self._retreat(self._index - 1) 2244 return None 2245 2246 return self.expression(exp.CopyGrantsProperty) 2247 2248 def _parse_freespace(self) -> exp.FreespaceProperty: 2249 self._match(TokenType.EQ) 2250 return self.expression( 2251 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2252 ) 2253 2254 def _parse_mergeblockratio( 2255 self, no: bool = False, default: bool = False 2256 ) -> exp.MergeBlockRatioProperty: 2257 if self._match(TokenType.EQ): 2258 return self.expression( 2259 exp.MergeBlockRatioProperty, 2260 this=self._parse_number(), 2261 percent=self._match(TokenType.PERCENT), 2262 ) 2263 2264 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2265 2266 def _parse_datablocksize( 2267 self, 2268 default: t.Optional[bool] = None, 2269 minimum: t.Optional[bool] = None, 2270 maximum: t.Optional[bool] = None, 2271 ) -> exp.DataBlocksizeProperty: 2272 self._match(TokenType.EQ) 2273 size = self._parse_number() 2274 2275 units = None 2276 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2277 units = self._prev.text 2278 2279 return self.expression( 2280 exp.DataBlocksizeProperty, 2281 size=size, 2282 units=units, 2283 default=default, 2284 minimum=minimum, 2285 maximum=maximum, 2286 ) 2287 2288 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2289 self._match(TokenType.EQ) 2290 always = self._match_text_seq("ALWAYS") 2291 manual = self._match_text_seq("MANUAL") 2292 never = self._match_text_seq("NEVER") 2293 default = self._match_text_seq("DEFAULT") 2294 2295 autotemp = None 2296 if self._match_text_seq("AUTOTEMP"): 2297 autotemp = self._parse_schema() 2298 2299 return self.expression( 2300 exp.BlockCompressionProperty, 2301 always=always, 2302 manual=manual, 2303 never=never, 2304 default=default, 2305 autotemp=autotemp, 2306 ) 2307 2308 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2309 index = self._index 2310 no = self._match_text_seq("NO") 2311 concurrent = self._match_text_seq("CONCURRENT") 2312 2313 if not self._match_text_seq("ISOLATED", "LOADING"): 2314 self._retreat(index) 2315 return None 2316 2317 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2318 return self.expression( 2319 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2320 ) 2321 2322 def _parse_locking(self) -> exp.LockingProperty: 2323 if self._match(TokenType.TABLE): 2324 kind = "TABLE" 2325 elif self._match(TokenType.VIEW): 2326 kind = "VIEW" 2327 elif self._match(TokenType.ROW): 2328 kind = "ROW" 2329 elif self._match_text_seq("DATABASE"): 2330 kind = "DATABASE" 2331 else: 2332 kind = None 2333 2334 if kind in ("DATABASE", "TABLE", "VIEW"): 2335 this = self._parse_table_parts() 2336 else: 2337 this = None 2338 2339 if self._match(TokenType.FOR): 2340 for_or_in = "FOR" 2341 elif self._match(TokenType.IN): 2342 for_or_in = "IN" 2343 else: 2344 for_or_in = None 2345 2346 if self._match_text_seq("ACCESS"): 2347 lock_type = "ACCESS" 2348 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2349 lock_type = "EXCLUSIVE" 2350 elif self._match_text_seq("SHARE"): 2351 lock_type = "SHARE" 2352 elif self._match_text_seq("READ"): 2353 lock_type = "READ" 2354 elif self._match_text_seq("WRITE"): 2355 lock_type = "WRITE" 2356 elif self._match_text_seq("CHECKSUM"): 2357 lock_type = "CHECKSUM" 2358 else: 2359 lock_type = None 2360 2361 override = self._match_text_seq("OVERRIDE") 2362 2363 return self.expression( 2364 exp.LockingProperty, 2365 this=this, 2366 kind=kind, 2367 for_or_in=for_or_in, 2368 lock_type=lock_type, 2369 override=override, 2370 ) 2371 2372 def _parse_partition_by(self) -> t.List[exp.Expression]: 2373 if self._match(TokenType.PARTITION_BY): 2374 return self._parse_csv(self._parse_assignment) 2375 return [] 2376 2377 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2378 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2379 if self._match_text_seq("MINVALUE"): 2380 return exp.var("MINVALUE") 2381 if self._match_text_seq("MAXVALUE"): 2382 return exp.var("MAXVALUE") 2383 return self._parse_bitwise() 2384 2385 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2386 expression = None 2387 from_expressions = None 2388 to_expressions = None 2389 2390 if self._match(TokenType.IN): 2391 this = self._parse_wrapped_csv(self._parse_bitwise) 2392 elif self._match(TokenType.FROM): 2393 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2394 self._match_text_seq("TO") 2395 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2396 elif self._match_text_seq("WITH", "(", "MODULUS"): 2397 this = self._parse_number() 2398 self._match_text_seq(",", "REMAINDER") 2399 expression = self._parse_number() 2400 self._match_r_paren() 2401 else: 2402 self.raise_error("Failed to parse partition bound spec.") 2403 2404 return self.expression( 2405 exp.PartitionBoundSpec, 2406 this=this, 2407 expression=expression, 2408 from_expressions=from_expressions, 2409 to_expressions=to_expressions, 2410 ) 2411 2412 # https://www.postgresql.org/docs/current/sql-createtable.html 2413 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2414 if not self._match_text_seq("OF"): 2415 self._retreat(self._index - 1) 2416 return None 2417 2418 this = self._parse_table(schema=True) 2419 2420 if self._match(TokenType.DEFAULT): 2421 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2422 elif self._match_text_seq("FOR", "VALUES"): 2423 expression = self._parse_partition_bound_spec() 2424 else: 2425 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2426 2427 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2428 2429 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2430 self._match(TokenType.EQ) 2431 return self.expression( 2432 exp.PartitionedByProperty, 2433 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2434 ) 2435 2436 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2437 if self._match_text_seq("AND", "STATISTICS"): 2438 statistics = True 2439 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2440 statistics = False 2441 else: 2442 statistics = None 2443 2444 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2445 2446 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2447 if self._match_text_seq("SQL"): 2448 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2449 return None 2450 2451 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2452 if self._match_text_seq("SQL", "DATA"): 2453 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2454 return None 2455 2456 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2457 if self._match_text_seq("PRIMARY", "INDEX"): 2458 return exp.NoPrimaryIndexProperty() 2459 if self._match_text_seq("SQL"): 2460 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2461 return None 2462 2463 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2464 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2465 return exp.OnCommitProperty() 2466 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2467 return exp.OnCommitProperty(delete=True) 2468 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2469 2470 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2471 if self._match_text_seq("SQL", "DATA"): 2472 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2473 return None 2474 2475 def _parse_distkey(self) -> exp.DistKeyProperty: 2476 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2477 2478 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2479 table = self._parse_table(schema=True) 2480 2481 options = [] 2482 while self._match_texts(("INCLUDING", "EXCLUDING")): 2483 this = self._prev.text.upper() 2484 2485 id_var = self._parse_id_var() 2486 if not id_var: 2487 return None 2488 2489 options.append( 2490 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2491 ) 2492 2493 return self.expression(exp.LikeProperty, this=table, expressions=options) 2494 2495 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2496 return self.expression( 2497 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2498 ) 2499 2500 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2501 self._match(TokenType.EQ) 2502 return self.expression( 2503 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2504 ) 2505 2506 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2507 self._match_text_seq("WITH", "CONNECTION") 2508 return self.expression( 2509 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2510 ) 2511 2512 def _parse_returns(self) -> exp.ReturnsProperty: 2513 value: t.Optional[exp.Expression] 2514 null = None 2515 is_table = self._match(TokenType.TABLE) 2516 2517 if is_table: 2518 if self._match(TokenType.LT): 2519 value = self.expression( 2520 exp.Schema, 2521 this="TABLE", 2522 expressions=self._parse_csv(self._parse_struct_types), 2523 ) 2524 if not self._match(TokenType.GT): 2525 self.raise_error("Expecting >") 2526 else: 2527 value = self._parse_schema(exp.var("TABLE")) 2528 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2529 null = True 2530 value = None 2531 else: 2532 value = self._parse_types() 2533 2534 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2535 2536 def _parse_describe(self) -> exp.Describe: 2537 kind = self._match_set(self.CREATABLES) and self._prev.text 2538 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2539 if self._match(TokenType.DOT): 2540 style = None 2541 self._retreat(self._index - 2) 2542 this = self._parse_table(schema=True) 2543 properties = self._parse_properties() 2544 expressions = properties.expressions if properties else None 2545 partition = self._parse_partition() 2546 return self.expression( 2547 exp.Describe, 2548 this=this, 2549 style=style, 2550 kind=kind, 2551 expressions=expressions, 2552 partition=partition, 2553 ) 2554 2555 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2556 kind = self._prev.text.upper() 2557 expressions = [] 2558 2559 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2560 if self._match(TokenType.WHEN): 2561 expression = self._parse_disjunction() 2562 self._match(TokenType.THEN) 2563 else: 2564 expression = None 2565 2566 else_ = self._match(TokenType.ELSE) 2567 2568 if not self._match(TokenType.INTO): 2569 return None 2570 2571 return self.expression( 2572 exp.ConditionalInsert, 2573 this=self.expression( 2574 exp.Insert, 2575 this=self._parse_table(schema=True), 2576 expression=self._parse_derived_table_values(), 2577 ), 2578 expression=expression, 2579 else_=else_, 2580 ) 2581 2582 expression = parse_conditional_insert() 2583 while expression is not None: 2584 expressions.append(expression) 2585 expression = parse_conditional_insert() 2586 2587 return self.expression( 2588 exp.MultitableInserts, 2589 kind=kind, 2590 comments=comments, 2591 expressions=expressions, 2592 source=self._parse_table(), 2593 ) 2594 2595 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2596 comments = ensure_list(self._prev_comments) 2597 hint = self._parse_hint() 2598 overwrite = self._match(TokenType.OVERWRITE) 2599 ignore = self._match(TokenType.IGNORE) 2600 local = self._match_text_seq("LOCAL") 2601 alternative = None 2602 is_function = None 2603 2604 if self._match_text_seq("DIRECTORY"): 2605 this: t.Optional[exp.Expression] = self.expression( 2606 exp.Directory, 2607 this=self._parse_var_or_string(), 2608 local=local, 2609 row_format=self._parse_row_format(match_row=True), 2610 ) 2611 else: 2612 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2613 comments += ensure_list(self._prev_comments) 2614 return self._parse_multitable_inserts(comments) 2615 2616 if self._match(TokenType.OR): 2617 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2618 2619 self._match(TokenType.INTO) 2620 comments += ensure_list(self._prev_comments) 2621 self._match(TokenType.TABLE) 2622 is_function = self._match(TokenType.FUNCTION) 2623 2624 this = ( 2625 self._parse_table(schema=True, parse_partition=True) 2626 if not is_function 2627 else self._parse_function() 2628 ) 2629 2630 returning = self._parse_returning() 2631 2632 return self.expression( 2633 exp.Insert, 2634 comments=comments, 2635 hint=hint, 2636 is_function=is_function, 2637 this=this, 2638 stored=self._match_text_seq("STORED") and self._parse_stored(), 2639 by_name=self._match_text_seq("BY", "NAME"), 2640 exists=self._parse_exists(), 2641 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2642 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2643 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2644 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2645 conflict=self._parse_on_conflict(), 2646 returning=returning or self._parse_returning(), 2647 overwrite=overwrite, 2648 alternative=alternative, 2649 ignore=ignore, 2650 source=self._match(TokenType.TABLE) and self._parse_table(), 2651 ) 2652 2653 def _parse_kill(self) -> exp.Kill: 2654 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2655 2656 return self.expression( 2657 exp.Kill, 2658 this=self._parse_primary(), 2659 kind=kind, 2660 ) 2661 2662 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2663 conflict = self._match_text_seq("ON", "CONFLICT") 2664 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2665 2666 if not conflict and not duplicate: 2667 return None 2668 2669 conflict_keys = None 2670 constraint = None 2671 2672 if conflict: 2673 if self._match_text_seq("ON", "CONSTRAINT"): 2674 constraint = self._parse_id_var() 2675 elif self._match(TokenType.L_PAREN): 2676 conflict_keys = self._parse_csv(self._parse_id_var) 2677 self._match_r_paren() 2678 2679 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2680 if self._prev.token_type == TokenType.UPDATE: 2681 self._match(TokenType.SET) 2682 expressions = self._parse_csv(self._parse_equality) 2683 else: 2684 expressions = None 2685 2686 return self.expression( 2687 exp.OnConflict, 2688 duplicate=duplicate, 2689 expressions=expressions, 2690 action=action, 2691 conflict_keys=conflict_keys, 2692 constraint=constraint, 2693 ) 2694 2695 def _parse_returning(self) -> t.Optional[exp.Returning]: 2696 if not self._match(TokenType.RETURNING): 2697 return None 2698 return self.expression( 2699 exp.Returning, 2700 expressions=self._parse_csv(self._parse_expression), 2701 into=self._match(TokenType.INTO) and self._parse_table_part(), 2702 ) 2703 2704 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2705 if not self._match(TokenType.FORMAT): 2706 return None 2707 return self._parse_row_format() 2708 2709 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2710 index = self._index 2711 with_ = with_ or self._match_text_seq("WITH") 2712 2713 if not self._match(TokenType.SERDE_PROPERTIES): 2714 self._retreat(index) 2715 return None 2716 return self.expression( 2717 exp.SerdeProperties, 2718 **{ # type: ignore 2719 "expressions": self._parse_wrapped_properties(), 2720 "with": with_, 2721 }, 2722 ) 2723 2724 def _parse_row_format( 2725 self, match_row: bool = False 2726 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2727 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2728 return None 2729 2730 if self._match_text_seq("SERDE"): 2731 this = self._parse_string() 2732 2733 serde_properties = self._parse_serde_properties() 2734 2735 return self.expression( 2736 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2737 ) 2738 2739 self._match_text_seq("DELIMITED") 2740 2741 kwargs = {} 2742 2743 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2744 kwargs["fields"] = self._parse_string() 2745 if self._match_text_seq("ESCAPED", "BY"): 2746 kwargs["escaped"] = self._parse_string() 2747 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2748 kwargs["collection_items"] = self._parse_string() 2749 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2750 kwargs["map_keys"] = self._parse_string() 2751 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2752 kwargs["lines"] = self._parse_string() 2753 if self._match_text_seq("NULL", "DEFINED", "AS"): 2754 kwargs["null"] = self._parse_string() 2755 2756 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2757 2758 def _parse_load(self) -> exp.LoadData | exp.Command: 2759 if self._match_text_seq("DATA"): 2760 local = self._match_text_seq("LOCAL") 2761 self._match_text_seq("INPATH") 2762 inpath = self._parse_string() 2763 overwrite = self._match(TokenType.OVERWRITE) 2764 self._match_pair(TokenType.INTO, TokenType.TABLE) 2765 2766 return self.expression( 2767 exp.LoadData, 2768 this=self._parse_table(schema=True), 2769 local=local, 2770 overwrite=overwrite, 2771 inpath=inpath, 2772 partition=self._parse_partition(), 2773 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2774 serde=self._match_text_seq("SERDE") and self._parse_string(), 2775 ) 2776 return self._parse_as_command(self._prev) 2777 2778 def _parse_delete(self) -> exp.Delete: 2779 # This handles MySQL's "Multiple-Table Syntax" 2780 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2781 tables = None 2782 comments = self._prev_comments 2783 if not self._match(TokenType.FROM, advance=False): 2784 tables = self._parse_csv(self._parse_table) or None 2785 2786 returning = self._parse_returning() 2787 2788 return self.expression( 2789 exp.Delete, 2790 comments=comments, 2791 tables=tables, 2792 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2793 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2794 where=self._parse_where(), 2795 returning=returning or self._parse_returning(), 2796 limit=self._parse_limit(), 2797 ) 2798 2799 def _parse_update(self) -> exp.Update: 2800 comments = self._prev_comments 2801 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2802 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2803 returning = self._parse_returning() 2804 return self.expression( 2805 exp.Update, 2806 comments=comments, 2807 **{ # type: ignore 2808 "this": this, 2809 "expressions": expressions, 2810 "from": self._parse_from(joins=True), 2811 "where": self._parse_where(), 2812 "returning": returning or self._parse_returning(), 2813 "order": self._parse_order(), 2814 "limit": self._parse_limit(), 2815 }, 2816 ) 2817 2818 def _parse_uncache(self) -> exp.Uncache: 2819 if not self._match(TokenType.TABLE): 2820 self.raise_error("Expecting TABLE after UNCACHE") 2821 2822 return self.expression( 2823 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2824 ) 2825 2826 def _parse_cache(self) -> exp.Cache: 2827 lazy = self._match_text_seq("LAZY") 2828 self._match(TokenType.TABLE) 2829 table = self._parse_table(schema=True) 2830 2831 options = [] 2832 if self._match_text_seq("OPTIONS"): 2833 self._match_l_paren() 2834 k = self._parse_string() 2835 self._match(TokenType.EQ) 2836 v = self._parse_string() 2837 options = [k, v] 2838 self._match_r_paren() 2839 2840 self._match(TokenType.ALIAS) 2841 return self.expression( 2842 exp.Cache, 2843 this=table, 2844 lazy=lazy, 2845 options=options, 2846 expression=self._parse_select(nested=True), 2847 ) 2848 2849 def _parse_partition(self) -> t.Optional[exp.Partition]: 2850 if not self._match(TokenType.PARTITION): 2851 return None 2852 2853 return self.expression( 2854 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2855 ) 2856 2857 def _parse_value(self) -> t.Optional[exp.Tuple]: 2858 if self._match(TokenType.L_PAREN): 2859 expressions = self._parse_csv(self._parse_expression) 2860 self._match_r_paren() 2861 return self.expression(exp.Tuple, expressions=expressions) 2862 2863 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2864 expression = self._parse_expression() 2865 if expression: 2866 return self.expression(exp.Tuple, expressions=[expression]) 2867 return None 2868 2869 def _parse_projections(self) -> t.List[exp.Expression]: 2870 return self._parse_expressions() 2871 2872 def _parse_select( 2873 self, 2874 nested: bool = False, 2875 table: bool = False, 2876 parse_subquery_alias: bool = True, 2877 parse_set_operation: bool = True, 2878 ) -> t.Optional[exp.Expression]: 2879 cte = self._parse_with() 2880 2881 if cte: 2882 this = self._parse_statement() 2883 2884 if not this: 2885 self.raise_error("Failed to parse any statement following CTE") 2886 return cte 2887 2888 if "with" in this.arg_types: 2889 this.set("with", cte) 2890 else: 2891 self.raise_error(f"{this.key} does not support CTE") 2892 this = cte 2893 2894 return this 2895 2896 # duckdb supports leading with FROM x 2897 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2898 2899 if self._match(TokenType.SELECT): 2900 comments = self._prev_comments 2901 2902 hint = self._parse_hint() 2903 2904 if self._next and not self._next.token_type == TokenType.DOT: 2905 all_ = self._match(TokenType.ALL) 2906 distinct = self._match_set(self.DISTINCT_TOKENS) 2907 else: 2908 all_, distinct = None, None 2909 2910 kind = ( 2911 self._match(TokenType.ALIAS) 2912 and self._match_texts(("STRUCT", "VALUE")) 2913 and self._prev.text.upper() 2914 ) 2915 2916 if distinct: 2917 distinct = self.expression( 2918 exp.Distinct, 2919 on=self._parse_value() if self._match(TokenType.ON) else None, 2920 ) 2921 2922 if all_ and distinct: 2923 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2924 2925 limit = self._parse_limit(top=True) 2926 projections = self._parse_projections() 2927 2928 this = self.expression( 2929 exp.Select, 2930 kind=kind, 2931 hint=hint, 2932 distinct=distinct, 2933 expressions=projections, 2934 limit=limit, 2935 ) 2936 this.comments = comments 2937 2938 into = self._parse_into() 2939 if into: 2940 this.set("into", into) 2941 2942 if not from_: 2943 from_ = self._parse_from() 2944 2945 if from_: 2946 this.set("from", from_) 2947 2948 this = self._parse_query_modifiers(this) 2949 elif (table or nested) and self._match(TokenType.L_PAREN): 2950 if self._match(TokenType.PIVOT): 2951 this = self._parse_simplified_pivot() 2952 elif self._match(TokenType.FROM): 2953 this = exp.select("*").from_( 2954 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2955 ) 2956 else: 2957 this = ( 2958 self._parse_table() 2959 if table 2960 else self._parse_select(nested=True, parse_set_operation=False) 2961 ) 2962 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2963 2964 self._match_r_paren() 2965 2966 # We return early here so that the UNION isn't attached to the subquery by the 2967 # following call to _parse_set_operations, but instead becomes the parent node 2968 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2969 elif self._match(TokenType.VALUES, advance=False): 2970 this = self._parse_derived_table_values() 2971 elif from_: 2972 this = exp.select("*").from_(from_.this, copy=False) 2973 elif self._match(TokenType.SUMMARIZE): 2974 table = self._match(TokenType.TABLE) 2975 this = self._parse_select() or self._parse_string() or self._parse_table() 2976 return self.expression(exp.Summarize, this=this, table=table) 2977 elif self._match(TokenType.DESCRIBE): 2978 this = self._parse_describe() 2979 elif self._match_text_seq("STREAM"): 2980 this = self.expression(exp.Stream, this=self._parse_function()) 2981 else: 2982 this = None 2983 2984 return self._parse_set_operations(this) if parse_set_operation else this 2985 2986 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2987 if not skip_with_token and not self._match(TokenType.WITH): 2988 return None 2989 2990 comments = self._prev_comments 2991 recursive = self._match(TokenType.RECURSIVE) 2992 2993 expressions = [] 2994 while True: 2995 expressions.append(self._parse_cte()) 2996 2997 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2998 break 2999 else: 3000 self._match(TokenType.WITH) 3001 3002 return self.expression( 3003 exp.With, comments=comments, expressions=expressions, recursive=recursive 3004 ) 3005 3006 def _parse_cte(self) -> exp.CTE: 3007 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3008 if not alias or not alias.this: 3009 self.raise_error("Expected CTE to have alias") 3010 3011 self._match(TokenType.ALIAS) 3012 comments = self._prev_comments 3013 3014 if self._match_text_seq("NOT", "MATERIALIZED"): 3015 materialized = False 3016 elif self._match_text_seq("MATERIALIZED"): 3017 materialized = True 3018 else: 3019 materialized = None 3020 3021 return self.expression( 3022 exp.CTE, 3023 this=self._parse_wrapped(self._parse_statement), 3024 alias=alias, 3025 materialized=materialized, 3026 comments=comments, 3027 ) 3028 3029 def _parse_table_alias( 3030 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3031 ) -> t.Optional[exp.TableAlias]: 3032 any_token = self._match(TokenType.ALIAS) 3033 alias = ( 3034 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3035 or self._parse_string_as_identifier() 3036 ) 3037 3038 index = self._index 3039 if self._match(TokenType.L_PAREN): 3040 columns = self._parse_csv(self._parse_function_parameter) 3041 self._match_r_paren() if columns else self._retreat(index) 3042 else: 3043 columns = None 3044 3045 if not alias and not columns: 3046 return None 3047 3048 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3049 3050 # We bubble up comments from the Identifier to the TableAlias 3051 if isinstance(alias, exp.Identifier): 3052 table_alias.add_comments(alias.pop_comments()) 3053 3054 return table_alias 3055 3056 def _parse_subquery( 3057 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3058 ) -> t.Optional[exp.Subquery]: 3059 if not this: 3060 return None 3061 3062 return self.expression( 3063 exp.Subquery, 3064 this=this, 3065 pivots=self._parse_pivots(), 3066 alias=self._parse_table_alias() if parse_alias else None, 3067 sample=self._parse_table_sample(), 3068 ) 3069 3070 def _implicit_unnests_to_explicit(self, this: E) -> E: 3071 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3072 3073 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3074 for i, join in enumerate(this.args.get("joins") or []): 3075 table = join.this 3076 normalized_table = table.copy() 3077 normalized_table.meta["maybe_column"] = True 3078 normalized_table = _norm(normalized_table, dialect=self.dialect) 3079 3080 if isinstance(table, exp.Table) and not join.args.get("on"): 3081 if normalized_table.parts[0].name in refs: 3082 table_as_column = table.to_column() 3083 unnest = exp.Unnest(expressions=[table_as_column]) 3084 3085 # Table.to_column creates a parent Alias node that we want to convert to 3086 # a TableAlias and attach to the Unnest, so it matches the parser's output 3087 if isinstance(table.args.get("alias"), exp.TableAlias): 3088 table_as_column.replace(table_as_column.this) 3089 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3090 3091 table.replace(unnest) 3092 3093 refs.add(normalized_table.alias_or_name) 3094 3095 return this 3096 3097 def _parse_query_modifiers( 3098 self, this: t.Optional[exp.Expression] 3099 ) -> t.Optional[exp.Expression]: 3100 if isinstance(this, (exp.Query, exp.Table)): 3101 for join in self._parse_joins(): 3102 this.append("joins", join) 3103 for lateral in iter(self._parse_lateral, None): 3104 this.append("laterals", lateral) 3105 3106 while True: 3107 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3108 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3109 key, expression = parser(self) 3110 3111 if expression: 3112 this.set(key, expression) 3113 if key == "limit": 3114 offset = expression.args.pop("offset", None) 3115 3116 if offset: 3117 offset = exp.Offset(expression=offset) 3118 this.set("offset", offset) 3119 3120 limit_by_expressions = expression.expressions 3121 expression.set("expressions", None) 3122 offset.set("expressions", limit_by_expressions) 3123 continue 3124 break 3125 3126 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3127 this = self._implicit_unnests_to_explicit(this) 3128 3129 return this 3130 3131 def _parse_hint(self) -> t.Optional[exp.Hint]: 3132 if self._match(TokenType.HINT): 3133 hints = [] 3134 for hint in iter( 3135 lambda: self._parse_csv( 3136 lambda: self._parse_function() or self._parse_var(upper=True) 3137 ), 3138 [], 3139 ): 3140 hints.extend(hint) 3141 3142 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3143 self.raise_error("Expected */ after HINT") 3144 3145 return self.expression(exp.Hint, expressions=hints) 3146 3147 return None 3148 3149 def _parse_into(self) -> t.Optional[exp.Into]: 3150 if not self._match(TokenType.INTO): 3151 return None 3152 3153 temp = self._match(TokenType.TEMPORARY) 3154 unlogged = self._match_text_seq("UNLOGGED") 3155 self._match(TokenType.TABLE) 3156 3157 return self.expression( 3158 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3159 ) 3160 3161 def _parse_from( 3162 self, joins: bool = False, skip_from_token: bool = False 3163 ) -> t.Optional[exp.From]: 3164 if not skip_from_token and not self._match(TokenType.FROM): 3165 return None 3166 3167 return self.expression( 3168 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3169 ) 3170 3171 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3172 return self.expression( 3173 exp.MatchRecognizeMeasure, 3174 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3175 this=self._parse_expression(), 3176 ) 3177 3178 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3179 if not self._match(TokenType.MATCH_RECOGNIZE): 3180 return None 3181 3182 self._match_l_paren() 3183 3184 partition = self._parse_partition_by() 3185 order = self._parse_order() 3186 3187 measures = ( 3188 self._parse_csv(self._parse_match_recognize_measure) 3189 if self._match_text_seq("MEASURES") 3190 else None 3191 ) 3192 3193 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3194 rows = exp.var("ONE ROW PER MATCH") 3195 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3196 text = "ALL ROWS PER MATCH" 3197 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3198 text += " SHOW EMPTY MATCHES" 3199 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3200 text += " OMIT EMPTY MATCHES" 3201 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3202 text += " WITH UNMATCHED ROWS" 3203 rows = exp.var(text) 3204 else: 3205 rows = None 3206 3207 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3208 text = "AFTER MATCH SKIP" 3209 if self._match_text_seq("PAST", "LAST", "ROW"): 3210 text += " PAST LAST ROW" 3211 elif self._match_text_seq("TO", "NEXT", "ROW"): 3212 text += " TO NEXT ROW" 3213 elif self._match_text_seq("TO", "FIRST"): 3214 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3215 elif self._match_text_seq("TO", "LAST"): 3216 text += f" TO LAST {self._advance_any().text}" # type: ignore 3217 after = exp.var(text) 3218 else: 3219 after = None 3220 3221 if self._match_text_seq("PATTERN"): 3222 self._match_l_paren() 3223 3224 if not self._curr: 3225 self.raise_error("Expecting )", self._curr) 3226 3227 paren = 1 3228 start = self._curr 3229 3230 while self._curr and paren > 0: 3231 if self._curr.token_type == TokenType.L_PAREN: 3232 paren += 1 3233 if self._curr.token_type == TokenType.R_PAREN: 3234 paren -= 1 3235 3236 end = self._prev 3237 self._advance() 3238 3239 if paren > 0: 3240 self.raise_error("Expecting )", self._curr) 3241 3242 pattern = exp.var(self._find_sql(start, end)) 3243 else: 3244 pattern = None 3245 3246 define = ( 3247 self._parse_csv(self._parse_name_as_expression) 3248 if self._match_text_seq("DEFINE") 3249 else None 3250 ) 3251 3252 self._match_r_paren() 3253 3254 return self.expression( 3255 exp.MatchRecognize, 3256 partition_by=partition, 3257 order=order, 3258 measures=measures, 3259 rows=rows, 3260 after=after, 3261 pattern=pattern, 3262 define=define, 3263 alias=self._parse_table_alias(), 3264 ) 3265 3266 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3267 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3268 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3269 cross_apply = False 3270 3271 if cross_apply is not None: 3272 this = self._parse_select(table=True) 3273 view = None 3274 outer = None 3275 elif self._match(TokenType.LATERAL): 3276 this = self._parse_select(table=True) 3277 view = self._match(TokenType.VIEW) 3278 outer = self._match(TokenType.OUTER) 3279 else: 3280 return None 3281 3282 if not this: 3283 this = ( 3284 self._parse_unnest() 3285 or self._parse_function() 3286 or self._parse_id_var(any_token=False) 3287 ) 3288 3289 while self._match(TokenType.DOT): 3290 this = exp.Dot( 3291 this=this, 3292 expression=self._parse_function() or self._parse_id_var(any_token=False), 3293 ) 3294 3295 if view: 3296 table = self._parse_id_var(any_token=False) 3297 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3298 table_alias: t.Optional[exp.TableAlias] = self.expression( 3299 exp.TableAlias, this=table, columns=columns 3300 ) 3301 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3302 # We move the alias from the lateral's child node to the lateral itself 3303 table_alias = this.args["alias"].pop() 3304 else: 3305 table_alias = self._parse_table_alias() 3306 3307 return self.expression( 3308 exp.Lateral, 3309 this=this, 3310 view=view, 3311 outer=outer, 3312 alias=table_alias, 3313 cross_apply=cross_apply, 3314 ) 3315 3316 def _parse_join_parts( 3317 self, 3318 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3319 return ( 3320 self._match_set(self.JOIN_METHODS) and self._prev, 3321 self._match_set(self.JOIN_SIDES) and self._prev, 3322 self._match_set(self.JOIN_KINDS) and self._prev, 3323 ) 3324 3325 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3326 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3327 this = self._parse_column() 3328 if isinstance(this, exp.Column): 3329 return this.this 3330 return this 3331 3332 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3333 3334 def _parse_join( 3335 self, skip_join_token: bool = False, parse_bracket: bool = False 3336 ) -> t.Optional[exp.Join]: 3337 if self._match(TokenType.COMMA): 3338 return self.expression(exp.Join, this=self._parse_table()) 3339 3340 index = self._index 3341 method, side, kind = self._parse_join_parts() 3342 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3343 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3344 3345 if not skip_join_token and not join: 3346 self._retreat(index) 3347 kind = None 3348 method = None 3349 side = None 3350 3351 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3352 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3353 3354 if not skip_join_token and not join and not outer_apply and not cross_apply: 3355 return None 3356 3357 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3358 3359 if method: 3360 kwargs["method"] = method.text 3361 if side: 3362 kwargs["side"] = side.text 3363 if kind: 3364 kwargs["kind"] = kind.text 3365 if hint: 3366 kwargs["hint"] = hint 3367 3368 if self._match(TokenType.MATCH_CONDITION): 3369 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3370 3371 if self._match(TokenType.ON): 3372 kwargs["on"] = self._parse_assignment() 3373 elif self._match(TokenType.USING): 3374 kwargs["using"] = self._parse_using_identifiers() 3375 elif ( 3376 not (outer_apply or cross_apply) 3377 and not isinstance(kwargs["this"], exp.Unnest) 3378 and not (kind and kind.token_type == TokenType.CROSS) 3379 ): 3380 index = self._index 3381 joins: t.Optional[list] = list(self._parse_joins()) 3382 3383 if joins and self._match(TokenType.ON): 3384 kwargs["on"] = self._parse_assignment() 3385 elif joins and self._match(TokenType.USING): 3386 kwargs["using"] = self._parse_using_identifiers() 3387 else: 3388 joins = None 3389 self._retreat(index) 3390 3391 kwargs["this"].set("joins", joins if joins else None) 3392 3393 comments = [c for token in (method, side, kind) if token for c in token.comments] 3394 return self.expression(exp.Join, comments=comments, **kwargs) 3395 3396 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3397 this = self._parse_assignment() 3398 3399 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3400 return this 3401 3402 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3403 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3404 3405 return this 3406 3407 def _parse_index_params(self) -> exp.IndexParameters: 3408 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3409 3410 if self._match(TokenType.L_PAREN, advance=False): 3411 columns = self._parse_wrapped_csv(self._parse_with_operator) 3412 else: 3413 columns = None 3414 3415 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3416 partition_by = self._parse_partition_by() 3417 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3418 tablespace = ( 3419 self._parse_var(any_token=True) 3420 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3421 else None 3422 ) 3423 where = self._parse_where() 3424 3425 on = self._parse_field() if self._match(TokenType.ON) else None 3426 3427 return self.expression( 3428 exp.IndexParameters, 3429 using=using, 3430 columns=columns, 3431 include=include, 3432 partition_by=partition_by, 3433 where=where, 3434 with_storage=with_storage, 3435 tablespace=tablespace, 3436 on=on, 3437 ) 3438 3439 def _parse_index( 3440 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3441 ) -> t.Optional[exp.Index]: 3442 if index or anonymous: 3443 unique = None 3444 primary = None 3445 amp = None 3446 3447 self._match(TokenType.ON) 3448 self._match(TokenType.TABLE) # hive 3449 table = self._parse_table_parts(schema=True) 3450 else: 3451 unique = self._match(TokenType.UNIQUE) 3452 primary = self._match_text_seq("PRIMARY") 3453 amp = self._match_text_seq("AMP") 3454 3455 if not self._match(TokenType.INDEX): 3456 return None 3457 3458 index = self._parse_id_var() 3459 table = None 3460 3461 params = self._parse_index_params() 3462 3463 return self.expression( 3464 exp.Index, 3465 this=index, 3466 table=table, 3467 unique=unique, 3468 primary=primary, 3469 amp=amp, 3470 params=params, 3471 ) 3472 3473 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3474 hints: t.List[exp.Expression] = [] 3475 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3476 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3477 hints.append( 3478 self.expression( 3479 exp.WithTableHint, 3480 expressions=self._parse_csv( 3481 lambda: self._parse_function() or self._parse_var(any_token=True) 3482 ), 3483 ) 3484 ) 3485 self._match_r_paren() 3486 else: 3487 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3488 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3489 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3490 3491 self._match_set((TokenType.INDEX, TokenType.KEY)) 3492 if self._match(TokenType.FOR): 3493 hint.set("target", self._advance_any() and self._prev.text.upper()) 3494 3495 hint.set("expressions", self._parse_wrapped_id_vars()) 3496 hints.append(hint) 3497 3498 return hints or None 3499 3500 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3501 return ( 3502 (not schema and self._parse_function(optional_parens=False)) 3503 or self._parse_id_var(any_token=False) 3504 or self._parse_string_as_identifier() 3505 or self._parse_placeholder() 3506 ) 3507 3508 def _parse_table_parts( 3509 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3510 ) -> exp.Table: 3511 catalog = None 3512 db = None 3513 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3514 3515 while self._match(TokenType.DOT): 3516 if catalog: 3517 # This allows nesting the table in arbitrarily many dot expressions if needed 3518 table = self.expression( 3519 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3520 ) 3521 else: 3522 catalog = db 3523 db = table 3524 # "" used for tsql FROM a..b case 3525 table = self._parse_table_part(schema=schema) or "" 3526 3527 if ( 3528 wildcard 3529 and self._is_connected() 3530 and (isinstance(table, exp.Identifier) or not table) 3531 and self._match(TokenType.STAR) 3532 ): 3533 if isinstance(table, exp.Identifier): 3534 table.args["this"] += "*" 3535 else: 3536 table = exp.Identifier(this="*") 3537 3538 # We bubble up comments from the Identifier to the Table 3539 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3540 3541 if is_db_reference: 3542 catalog = db 3543 db = table 3544 table = None 3545 3546 if not table and not is_db_reference: 3547 self.raise_error(f"Expected table name but got {self._curr}") 3548 if not db and is_db_reference: 3549 self.raise_error(f"Expected database name but got {self._curr}") 3550 3551 table = self.expression( 3552 exp.Table, 3553 comments=comments, 3554 this=table, 3555 db=db, 3556 catalog=catalog, 3557 ) 3558 3559 changes = self._parse_changes() 3560 if changes: 3561 table.set("changes", changes) 3562 3563 at_before = self._parse_historical_data() 3564 if at_before: 3565 table.set("when", at_before) 3566 3567 pivots = self._parse_pivots() 3568 if pivots: 3569 table.set("pivots", pivots) 3570 3571 return table 3572 3573 def _parse_table( 3574 self, 3575 schema: bool = False, 3576 joins: bool = False, 3577 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3578 parse_bracket: bool = False, 3579 is_db_reference: bool = False, 3580 parse_partition: bool = False, 3581 ) -> t.Optional[exp.Expression]: 3582 lateral = self._parse_lateral() 3583 if lateral: 3584 return lateral 3585 3586 unnest = self._parse_unnest() 3587 if unnest: 3588 return unnest 3589 3590 values = self._parse_derived_table_values() 3591 if values: 3592 return values 3593 3594 subquery = self._parse_select(table=True) 3595 if subquery: 3596 if not subquery.args.get("pivots"): 3597 subquery.set("pivots", self._parse_pivots()) 3598 return subquery 3599 3600 bracket = parse_bracket and self._parse_bracket(None) 3601 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3602 3603 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3604 self._parse_table 3605 ) 3606 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3607 3608 only = self._match(TokenType.ONLY) 3609 3610 this = t.cast( 3611 exp.Expression, 3612 bracket 3613 or rows_from 3614 or self._parse_bracket( 3615 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3616 ), 3617 ) 3618 3619 if only: 3620 this.set("only", only) 3621 3622 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3623 self._match_text_seq("*") 3624 3625 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3626 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3627 this.set("partition", self._parse_partition()) 3628 3629 if schema: 3630 return self._parse_schema(this=this) 3631 3632 version = self._parse_version() 3633 3634 if version: 3635 this.set("version", version) 3636 3637 if self.dialect.ALIAS_POST_TABLESAMPLE: 3638 this.set("sample", self._parse_table_sample()) 3639 3640 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3641 if alias: 3642 this.set("alias", alias) 3643 3644 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3645 return self.expression( 3646 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3647 ) 3648 3649 this.set("hints", self._parse_table_hints()) 3650 3651 if not this.args.get("pivots"): 3652 this.set("pivots", self._parse_pivots()) 3653 3654 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3655 this.set("sample", self._parse_table_sample()) 3656 3657 if joins: 3658 for join in self._parse_joins(): 3659 this.append("joins", join) 3660 3661 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3662 this.set("ordinality", True) 3663 this.set("alias", self._parse_table_alias()) 3664 3665 return this 3666 3667 def _parse_version(self) -> t.Optional[exp.Version]: 3668 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3669 this = "TIMESTAMP" 3670 elif self._match(TokenType.VERSION_SNAPSHOT): 3671 this = "VERSION" 3672 else: 3673 return None 3674 3675 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3676 kind = self._prev.text.upper() 3677 start = self._parse_bitwise() 3678 self._match_texts(("TO", "AND")) 3679 end = self._parse_bitwise() 3680 expression: t.Optional[exp.Expression] = self.expression( 3681 exp.Tuple, expressions=[start, end] 3682 ) 3683 elif self._match_text_seq("CONTAINED", "IN"): 3684 kind = "CONTAINED IN" 3685 expression = self.expression( 3686 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3687 ) 3688 elif self._match(TokenType.ALL): 3689 kind = "ALL" 3690 expression = None 3691 else: 3692 self._match_text_seq("AS", "OF") 3693 kind = "AS OF" 3694 expression = self._parse_type() 3695 3696 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3697 3698 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3699 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3700 index = self._index 3701 historical_data = None 3702 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3703 this = self._prev.text.upper() 3704 kind = ( 3705 self._match(TokenType.L_PAREN) 3706 and self._match_texts(self.HISTORICAL_DATA_KIND) 3707 and self._prev.text.upper() 3708 ) 3709 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3710 3711 if expression: 3712 self._match_r_paren() 3713 historical_data = self.expression( 3714 exp.HistoricalData, this=this, kind=kind, expression=expression 3715 ) 3716 else: 3717 self._retreat(index) 3718 3719 return historical_data 3720 3721 def _parse_changes(self) -> t.Optional[exp.Changes]: 3722 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3723 return None 3724 3725 information = self._parse_var(any_token=True) 3726 self._match_r_paren() 3727 3728 return self.expression( 3729 exp.Changes, 3730 information=information, 3731 at_before=self._parse_historical_data(), 3732 end=self._parse_historical_data(), 3733 ) 3734 3735 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3736 if not self._match(TokenType.UNNEST): 3737 return None 3738 3739 expressions = self._parse_wrapped_csv(self._parse_equality) 3740 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3741 3742 alias = self._parse_table_alias() if with_alias else None 3743 3744 if alias: 3745 if self.dialect.UNNEST_COLUMN_ONLY: 3746 if alias.args.get("columns"): 3747 self.raise_error("Unexpected extra column alias in unnest.") 3748 3749 alias.set("columns", [alias.this]) 3750 alias.set("this", None) 3751 3752 columns = alias.args.get("columns") or [] 3753 if offset and len(expressions) < len(columns): 3754 offset = columns.pop() 3755 3756 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3757 self._match(TokenType.ALIAS) 3758 offset = self._parse_id_var( 3759 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3760 ) or exp.to_identifier("offset") 3761 3762 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3763 3764 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3765 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3766 if not is_derived and not ( 3767 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3768 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3769 ): 3770 return None 3771 3772 expressions = self._parse_csv(self._parse_value) 3773 alias = self._parse_table_alias() 3774 3775 if is_derived: 3776 self._match_r_paren() 3777 3778 return self.expression( 3779 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3780 ) 3781 3782 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3783 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3784 as_modifier and self._match_text_seq("USING", "SAMPLE") 3785 ): 3786 return None 3787 3788 bucket_numerator = None 3789 bucket_denominator = None 3790 bucket_field = None 3791 percent = None 3792 size = None 3793 seed = None 3794 3795 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3796 matched_l_paren = self._match(TokenType.L_PAREN) 3797 3798 if self.TABLESAMPLE_CSV: 3799 num = None 3800 expressions = self._parse_csv(self._parse_primary) 3801 else: 3802 expressions = None 3803 num = ( 3804 self._parse_factor() 3805 if self._match(TokenType.NUMBER, advance=False) 3806 else self._parse_primary() or self._parse_placeholder() 3807 ) 3808 3809 if self._match_text_seq("BUCKET"): 3810 bucket_numerator = self._parse_number() 3811 self._match_text_seq("OUT", "OF") 3812 bucket_denominator = bucket_denominator = self._parse_number() 3813 self._match(TokenType.ON) 3814 bucket_field = self._parse_field() 3815 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3816 percent = num 3817 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3818 size = num 3819 else: 3820 percent = num 3821 3822 if matched_l_paren: 3823 self._match_r_paren() 3824 3825 if self._match(TokenType.L_PAREN): 3826 method = self._parse_var(upper=True) 3827 seed = self._match(TokenType.COMMA) and self._parse_number() 3828 self._match_r_paren() 3829 elif self._match_texts(("SEED", "REPEATABLE")): 3830 seed = self._parse_wrapped(self._parse_number) 3831 3832 if not method and self.DEFAULT_SAMPLING_METHOD: 3833 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3834 3835 return self.expression( 3836 exp.TableSample, 3837 expressions=expressions, 3838 method=method, 3839 bucket_numerator=bucket_numerator, 3840 bucket_denominator=bucket_denominator, 3841 bucket_field=bucket_field, 3842 percent=percent, 3843 size=size, 3844 seed=seed, 3845 ) 3846 3847 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3848 return list(iter(self._parse_pivot, None)) or None 3849 3850 def _parse_joins(self) -> t.Iterator[exp.Join]: 3851 return iter(self._parse_join, None) 3852 3853 # https://duckdb.org/docs/sql/statements/pivot 3854 def _parse_simplified_pivot(self) -> exp.Pivot: 3855 def _parse_on() -> t.Optional[exp.Expression]: 3856 this = self._parse_bitwise() 3857 return self._parse_in(this) if self._match(TokenType.IN) else this 3858 3859 this = self._parse_table() 3860 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3861 using = self._match(TokenType.USING) and self._parse_csv( 3862 lambda: self._parse_alias(self._parse_function()) 3863 ) 3864 group = self._parse_group() 3865 return self.expression( 3866 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3867 ) 3868 3869 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3870 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3871 this = self._parse_select_or_expression() 3872 3873 self._match(TokenType.ALIAS) 3874 alias = self._parse_bitwise() 3875 if alias: 3876 if isinstance(alias, exp.Column) and not alias.db: 3877 alias = alias.this 3878 return self.expression(exp.PivotAlias, this=this, alias=alias) 3879 3880 return this 3881 3882 value = self._parse_column() 3883 3884 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3885 self.raise_error("Expecting IN (") 3886 3887 if self._match(TokenType.ANY): 3888 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3889 else: 3890 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3891 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3892 3893 self._match_r_paren() 3894 return expr 3895 3896 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3897 index = self._index 3898 include_nulls = None 3899 3900 if self._match(TokenType.PIVOT): 3901 unpivot = False 3902 elif self._match(TokenType.UNPIVOT): 3903 unpivot = True 3904 3905 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3906 if self._match_text_seq("INCLUDE", "NULLS"): 3907 include_nulls = True 3908 elif self._match_text_seq("EXCLUDE", "NULLS"): 3909 include_nulls = False 3910 else: 3911 return None 3912 3913 expressions = [] 3914 3915 if not self._match(TokenType.L_PAREN): 3916 self._retreat(index) 3917 return None 3918 3919 if unpivot: 3920 expressions = self._parse_csv(self._parse_column) 3921 else: 3922 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3923 3924 if not expressions: 3925 self.raise_error("Failed to parse PIVOT's aggregation list") 3926 3927 if not self._match(TokenType.FOR): 3928 self.raise_error("Expecting FOR") 3929 3930 field = self._parse_pivot_in() 3931 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3932 self._parse_bitwise 3933 ) 3934 3935 self._match_r_paren() 3936 3937 pivot = self.expression( 3938 exp.Pivot, 3939 expressions=expressions, 3940 field=field, 3941 unpivot=unpivot, 3942 include_nulls=include_nulls, 3943 default_on_null=default_on_null, 3944 ) 3945 3946 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3947 pivot.set("alias", self._parse_table_alias()) 3948 3949 if not unpivot: 3950 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3951 3952 columns: t.List[exp.Expression] = [] 3953 for fld in pivot.args["field"].expressions: 3954 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3955 for name in names: 3956 if self.PREFIXED_PIVOT_COLUMNS: 3957 name = f"{name}_{field_name}" if name else field_name 3958 else: 3959 name = f"{field_name}_{name}" if name else field_name 3960 3961 columns.append(exp.to_identifier(name)) 3962 3963 pivot.set("columns", columns) 3964 3965 return pivot 3966 3967 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3968 return [agg.alias for agg in aggregations] 3969 3970 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3971 if not skip_where_token and not self._match(TokenType.PREWHERE): 3972 return None 3973 3974 return self.expression( 3975 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3976 ) 3977 3978 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3979 if not skip_where_token and not self._match(TokenType.WHERE): 3980 return None 3981 3982 return self.expression( 3983 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3984 ) 3985 3986 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3987 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3988 return None 3989 3990 elements: t.Dict[str, t.Any] = defaultdict(list) 3991 3992 if self._match(TokenType.ALL): 3993 elements["all"] = True 3994 elif self._match(TokenType.DISTINCT): 3995 elements["all"] = False 3996 3997 while True: 3998 index = self._index 3999 4000 elements["expressions"].extend( 4001 self._parse_csv( 4002 lambda: None 4003 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4004 else self._parse_assignment() 4005 ) 4006 ) 4007 4008 before_with_index = self._index 4009 with_prefix = self._match(TokenType.WITH) 4010 4011 if self._match(TokenType.ROLLUP): 4012 elements["rollup"].append( 4013 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4014 ) 4015 elif self._match(TokenType.CUBE): 4016 elements["cube"].append( 4017 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4018 ) 4019 elif self._match(TokenType.GROUPING_SETS): 4020 elements["grouping_sets"].append( 4021 self.expression( 4022 exp.GroupingSets, 4023 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4024 ) 4025 ) 4026 elif self._match_text_seq("TOTALS"): 4027 elements["totals"] = True # type: ignore 4028 4029 if before_with_index <= self._index <= before_with_index + 1: 4030 self._retreat(before_with_index) 4031 break 4032 4033 if index == self._index: 4034 break 4035 4036 return self.expression(exp.Group, **elements) # type: ignore 4037 4038 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4039 return self.expression( 4040 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4041 ) 4042 4043 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4044 if self._match(TokenType.L_PAREN): 4045 grouping_set = self._parse_csv(self._parse_column) 4046 self._match_r_paren() 4047 return self.expression(exp.Tuple, expressions=grouping_set) 4048 4049 return self._parse_column() 4050 4051 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4052 if not skip_having_token and not self._match(TokenType.HAVING): 4053 return None 4054 return self.expression(exp.Having, this=self._parse_assignment()) 4055 4056 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4057 if not self._match(TokenType.QUALIFY): 4058 return None 4059 return self.expression(exp.Qualify, this=self._parse_assignment()) 4060 4061 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4062 if skip_start_token: 4063 start = None 4064 elif self._match(TokenType.START_WITH): 4065 start = self._parse_assignment() 4066 else: 4067 return None 4068 4069 self._match(TokenType.CONNECT_BY) 4070 nocycle = self._match_text_seq("NOCYCLE") 4071 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4072 exp.Prior, this=self._parse_bitwise() 4073 ) 4074 connect = self._parse_assignment() 4075 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4076 4077 if not start and self._match(TokenType.START_WITH): 4078 start = self._parse_assignment() 4079 4080 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4081 4082 def _parse_name_as_expression(self) -> exp.Alias: 4083 return self.expression( 4084 exp.Alias, 4085 alias=self._parse_id_var(any_token=True), 4086 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4087 ) 4088 4089 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4090 if self._match_text_seq("INTERPOLATE"): 4091 return self._parse_wrapped_csv(self._parse_name_as_expression) 4092 return None 4093 4094 def _parse_order( 4095 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4096 ) -> t.Optional[exp.Expression]: 4097 siblings = None 4098 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4099 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4100 return this 4101 4102 siblings = True 4103 4104 return self.expression( 4105 exp.Order, 4106 this=this, 4107 expressions=self._parse_csv(self._parse_ordered), 4108 siblings=siblings, 4109 ) 4110 4111 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4112 if not self._match(token): 4113 return None 4114 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4115 4116 def _parse_ordered( 4117 self, parse_method: t.Optional[t.Callable] = None 4118 ) -> t.Optional[exp.Ordered]: 4119 this = parse_method() if parse_method else self._parse_assignment() 4120 if not this: 4121 return None 4122 4123 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4124 this = exp.var("ALL") 4125 4126 asc = self._match(TokenType.ASC) 4127 desc = self._match(TokenType.DESC) or (asc and False) 4128 4129 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4130 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4131 4132 nulls_first = is_nulls_first or False 4133 explicitly_null_ordered = is_nulls_first or is_nulls_last 4134 4135 if ( 4136 not explicitly_null_ordered 4137 and ( 4138 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4139 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4140 ) 4141 and self.dialect.NULL_ORDERING != "nulls_are_last" 4142 ): 4143 nulls_first = True 4144 4145 if self._match_text_seq("WITH", "FILL"): 4146 with_fill = self.expression( 4147 exp.WithFill, 4148 **{ # type: ignore 4149 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4150 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4151 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4152 "interpolate": self._parse_interpolate(), 4153 }, 4154 ) 4155 else: 4156 with_fill = None 4157 4158 return self.expression( 4159 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4160 ) 4161 4162 def _parse_limit( 4163 self, 4164 this: t.Optional[exp.Expression] = None, 4165 top: bool = False, 4166 skip_limit_token: bool = False, 4167 ) -> t.Optional[exp.Expression]: 4168 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4169 comments = self._prev_comments 4170 if top: 4171 limit_paren = self._match(TokenType.L_PAREN) 4172 expression = self._parse_term() if limit_paren else self._parse_number() 4173 4174 if limit_paren: 4175 self._match_r_paren() 4176 else: 4177 expression = self._parse_term() 4178 4179 if self._match(TokenType.COMMA): 4180 offset = expression 4181 expression = self._parse_term() 4182 else: 4183 offset = None 4184 4185 limit_exp = self.expression( 4186 exp.Limit, 4187 this=this, 4188 expression=expression, 4189 offset=offset, 4190 comments=comments, 4191 expressions=self._parse_limit_by(), 4192 ) 4193 4194 return limit_exp 4195 4196 if self._match(TokenType.FETCH): 4197 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4198 direction = self._prev.text.upper() if direction else "FIRST" 4199 4200 count = self._parse_field(tokens=self.FETCH_TOKENS) 4201 percent = self._match(TokenType.PERCENT) 4202 4203 self._match_set((TokenType.ROW, TokenType.ROWS)) 4204 4205 only = self._match_text_seq("ONLY") 4206 with_ties = self._match_text_seq("WITH", "TIES") 4207 4208 if only and with_ties: 4209 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4210 4211 return self.expression( 4212 exp.Fetch, 4213 direction=direction, 4214 count=count, 4215 percent=percent, 4216 with_ties=with_ties, 4217 ) 4218 4219 return this 4220 4221 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4222 if not self._match(TokenType.OFFSET): 4223 return this 4224 4225 count = self._parse_term() 4226 self._match_set((TokenType.ROW, TokenType.ROWS)) 4227 4228 return self.expression( 4229 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4230 ) 4231 4232 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4233 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4234 4235 def _parse_locks(self) -> t.List[exp.Lock]: 4236 locks = [] 4237 while True: 4238 if self._match_text_seq("FOR", "UPDATE"): 4239 update = True 4240 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4241 "LOCK", "IN", "SHARE", "MODE" 4242 ): 4243 update = False 4244 else: 4245 break 4246 4247 expressions = None 4248 if self._match_text_seq("OF"): 4249 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4250 4251 wait: t.Optional[bool | exp.Expression] = None 4252 if self._match_text_seq("NOWAIT"): 4253 wait = True 4254 elif self._match_text_seq("WAIT"): 4255 wait = self._parse_primary() 4256 elif self._match_text_seq("SKIP", "LOCKED"): 4257 wait = False 4258 4259 locks.append( 4260 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4261 ) 4262 4263 return locks 4264 4265 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4266 while this and self._match_set(self.SET_OPERATIONS): 4267 token_type = self._prev.token_type 4268 4269 if token_type == TokenType.UNION: 4270 operation: t.Type[exp.SetOperation] = exp.Union 4271 elif token_type == TokenType.EXCEPT: 4272 operation = exp.Except 4273 else: 4274 operation = exp.Intersect 4275 4276 comments = self._prev.comments 4277 4278 if self._match(TokenType.DISTINCT): 4279 distinct: t.Optional[bool] = True 4280 elif self._match(TokenType.ALL): 4281 distinct = False 4282 else: 4283 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4284 if distinct is None: 4285 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4286 4287 by_name = self._match_text_seq("BY", "NAME") 4288 expression = self._parse_select(nested=True, parse_set_operation=False) 4289 4290 this = self.expression( 4291 operation, 4292 comments=comments, 4293 this=this, 4294 distinct=distinct, 4295 by_name=by_name, 4296 expression=expression, 4297 ) 4298 4299 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4300 expression = this.expression 4301 4302 if expression: 4303 for arg in self.SET_OP_MODIFIERS: 4304 expr = expression.args.get(arg) 4305 if expr: 4306 this.set(arg, expr.pop()) 4307 4308 return this 4309 4310 def _parse_expression(self) -> t.Optional[exp.Expression]: 4311 return self._parse_alias(self._parse_assignment()) 4312 4313 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4314 this = self._parse_disjunction() 4315 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4316 # This allows us to parse <non-identifier token> := <expr> 4317 this = exp.column( 4318 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4319 ) 4320 4321 while self._match_set(self.ASSIGNMENT): 4322 this = self.expression( 4323 self.ASSIGNMENT[self._prev.token_type], 4324 this=this, 4325 comments=self._prev_comments, 4326 expression=self._parse_assignment(), 4327 ) 4328 4329 return this 4330 4331 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4332 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4333 4334 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4335 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4336 4337 def _parse_equality(self) -> t.Optional[exp.Expression]: 4338 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4339 4340 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4341 return self._parse_tokens(self._parse_range, self.COMPARISON) 4342 4343 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4344 this = this or self._parse_bitwise() 4345 negate = self._match(TokenType.NOT) 4346 4347 if self._match_set(self.RANGE_PARSERS): 4348 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4349 if not expression: 4350 return this 4351 4352 this = expression 4353 elif self._match(TokenType.ISNULL): 4354 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4355 4356 # Postgres supports ISNULL and NOTNULL for conditions. 4357 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4358 if self._match(TokenType.NOTNULL): 4359 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4360 this = self.expression(exp.Not, this=this) 4361 4362 if negate: 4363 this = self._negate_range(this) 4364 4365 if self._match(TokenType.IS): 4366 this = self._parse_is(this) 4367 4368 return this 4369 4370 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4371 if not this: 4372 return this 4373 4374 return self.expression(exp.Not, this=this) 4375 4376 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4377 index = self._index - 1 4378 negate = self._match(TokenType.NOT) 4379 4380 if self._match_text_seq("DISTINCT", "FROM"): 4381 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4382 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4383 4384 if self._match(TokenType.JSON): 4385 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4386 4387 if self._match_text_seq("WITH"): 4388 _with = True 4389 elif self._match_text_seq("WITHOUT"): 4390 _with = False 4391 else: 4392 _with = None 4393 4394 unique = self._match(TokenType.UNIQUE) 4395 self._match_text_seq("KEYS") 4396 expression: t.Optional[exp.Expression] = self.expression( 4397 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4398 ) 4399 else: 4400 expression = self._parse_primary() or self._parse_null() 4401 if not expression: 4402 self._retreat(index) 4403 return None 4404 4405 this = self.expression(exp.Is, this=this, expression=expression) 4406 return self.expression(exp.Not, this=this) if negate else this 4407 4408 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4409 unnest = self._parse_unnest(with_alias=False) 4410 if unnest: 4411 this = self.expression(exp.In, this=this, unnest=unnest) 4412 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4413 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4414 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4415 4416 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4417 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4418 else: 4419 this = self.expression(exp.In, this=this, expressions=expressions) 4420 4421 if matched_l_paren: 4422 self._match_r_paren(this) 4423 elif not self._match(TokenType.R_BRACKET, expression=this): 4424 self.raise_error("Expecting ]") 4425 else: 4426 this = self.expression(exp.In, this=this, field=self._parse_field()) 4427 4428 return this 4429 4430 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4431 low = self._parse_bitwise() 4432 self._match(TokenType.AND) 4433 high = self._parse_bitwise() 4434 return self.expression(exp.Between, this=this, low=low, high=high) 4435 4436 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4437 if not self._match(TokenType.ESCAPE): 4438 return this 4439 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4440 4441 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4442 index = self._index 4443 4444 if not self._match(TokenType.INTERVAL) and match_interval: 4445 return None 4446 4447 if self._match(TokenType.STRING, advance=False): 4448 this = self._parse_primary() 4449 else: 4450 this = self._parse_term() 4451 4452 if not this or ( 4453 isinstance(this, exp.Column) 4454 and not this.table 4455 and not this.this.quoted 4456 and this.name.upper() == "IS" 4457 ): 4458 self._retreat(index) 4459 return None 4460 4461 unit = self._parse_function() or ( 4462 not self._match(TokenType.ALIAS, advance=False) 4463 and self._parse_var(any_token=True, upper=True) 4464 ) 4465 4466 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4467 # each INTERVAL expression into this canonical form so it's easy to transpile 4468 if this and this.is_number: 4469 this = exp.Literal.string(this.to_py()) 4470 elif this and this.is_string: 4471 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4472 if len(parts) == 1: 4473 if unit: 4474 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4475 self._retreat(self._index - 1) 4476 4477 this = exp.Literal.string(parts[0][0]) 4478 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4479 4480 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4481 unit = self.expression( 4482 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4483 ) 4484 4485 interval = self.expression(exp.Interval, this=this, unit=unit) 4486 4487 index = self._index 4488 self._match(TokenType.PLUS) 4489 4490 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4491 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4492 return self.expression( 4493 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4494 ) 4495 4496 self._retreat(index) 4497 return interval 4498 4499 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4500 this = self._parse_term() 4501 4502 while True: 4503 if self._match_set(self.BITWISE): 4504 this = self.expression( 4505 self.BITWISE[self._prev.token_type], 4506 this=this, 4507 expression=self._parse_term(), 4508 ) 4509 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4510 this = self.expression( 4511 exp.DPipe, 4512 this=this, 4513 expression=self._parse_term(), 4514 safe=not self.dialect.STRICT_STRING_CONCAT, 4515 ) 4516 elif self._match(TokenType.DQMARK): 4517 this = self.expression( 4518 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4519 ) 4520 elif self._match_pair(TokenType.LT, TokenType.LT): 4521 this = self.expression( 4522 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4523 ) 4524 elif self._match_pair(TokenType.GT, TokenType.GT): 4525 this = self.expression( 4526 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4527 ) 4528 else: 4529 break 4530 4531 return this 4532 4533 def _parse_term(self) -> t.Optional[exp.Expression]: 4534 this = self._parse_factor() 4535 4536 while self._match_set(self.TERM): 4537 klass = self.TERM[self._prev.token_type] 4538 comments = self._prev_comments 4539 expression = self._parse_factor() 4540 4541 this = self.expression(klass, this=this, comments=comments, expression=expression) 4542 4543 if isinstance(this, exp.Collate): 4544 expr = this.expression 4545 4546 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4547 # fallback to Identifier / Var 4548 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4549 ident = expr.this 4550 if isinstance(ident, exp.Identifier): 4551 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4552 4553 return this 4554 4555 def _parse_factor(self) -> t.Optional[exp.Expression]: 4556 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4557 this = parse_method() 4558 4559 while self._match_set(self.FACTOR): 4560 klass = self.FACTOR[self._prev.token_type] 4561 comments = self._prev_comments 4562 expression = parse_method() 4563 4564 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4565 self._retreat(self._index - 1) 4566 return this 4567 4568 this = self.expression(klass, this=this, comments=comments, expression=expression) 4569 4570 if isinstance(this, exp.Div): 4571 this.args["typed"] = self.dialect.TYPED_DIVISION 4572 this.args["safe"] = self.dialect.SAFE_DIVISION 4573 4574 return this 4575 4576 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4577 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4578 4579 def _parse_unary(self) -> t.Optional[exp.Expression]: 4580 if self._match_set(self.UNARY_PARSERS): 4581 return self.UNARY_PARSERS[self._prev.token_type](self) 4582 return self._parse_at_time_zone(self._parse_type()) 4583 4584 def _parse_type( 4585 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4586 ) -> t.Optional[exp.Expression]: 4587 interval = parse_interval and self._parse_interval() 4588 if interval: 4589 return interval 4590 4591 index = self._index 4592 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4593 4594 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4595 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4596 if isinstance(data_type, exp.Cast): 4597 # This constructor can contain ops directly after it, for instance struct unnesting: 4598 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4599 return self._parse_column_ops(data_type) 4600 4601 if data_type: 4602 index2 = self._index 4603 this = self._parse_primary() 4604 4605 if isinstance(this, exp.Literal): 4606 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4607 if parser: 4608 return parser(self, this, data_type) 4609 4610 return self.expression(exp.Cast, this=this, to=data_type) 4611 4612 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4613 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4614 # 4615 # If the index difference here is greater than 1, that means the parser itself must have 4616 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4617 # 4618 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4619 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4620 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4621 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4622 # 4623 # In these cases, we don't really want to return the converted type, but instead retreat 4624 # and try to parse a Column or Identifier in the section below. 4625 if data_type.expressions and index2 - index > 1: 4626 self._retreat(index2) 4627 return self._parse_column_ops(data_type) 4628 4629 self._retreat(index) 4630 4631 if fallback_to_identifier: 4632 return self._parse_id_var() 4633 4634 this = self._parse_column() 4635 return this and self._parse_column_ops(this) 4636 4637 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4638 this = self._parse_type() 4639 if not this: 4640 return None 4641 4642 if isinstance(this, exp.Column) and not this.table: 4643 this = exp.var(this.name.upper()) 4644 4645 return self.expression( 4646 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4647 ) 4648 4649 def _parse_types( 4650 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4651 ) -> t.Optional[exp.Expression]: 4652 index = self._index 4653 4654 this: t.Optional[exp.Expression] = None 4655 prefix = self._match_text_seq("SYSUDTLIB", ".") 4656 4657 if not self._match_set(self.TYPE_TOKENS): 4658 identifier = allow_identifiers and self._parse_id_var( 4659 any_token=False, tokens=(TokenType.VAR,) 4660 ) 4661 if isinstance(identifier, exp.Identifier): 4662 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4663 4664 if len(tokens) != 1: 4665 self.raise_error("Unexpected identifier", self._prev) 4666 4667 if tokens[0].token_type in self.TYPE_TOKENS: 4668 self._prev = tokens[0] 4669 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4670 type_name = identifier.name 4671 4672 while self._match(TokenType.DOT): 4673 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4674 4675 this = exp.DataType.build(type_name, udt=True) 4676 else: 4677 self._retreat(self._index - 1) 4678 return None 4679 else: 4680 return None 4681 4682 type_token = self._prev.token_type 4683 4684 if type_token == TokenType.PSEUDO_TYPE: 4685 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4686 4687 if type_token == TokenType.OBJECT_IDENTIFIER: 4688 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4689 4690 # https://materialize.com/docs/sql/types/map/ 4691 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4692 key_type = self._parse_types( 4693 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4694 ) 4695 if not self._match(TokenType.FARROW): 4696 self._retreat(index) 4697 return None 4698 4699 value_type = self._parse_types( 4700 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4701 ) 4702 if not self._match(TokenType.R_BRACKET): 4703 self._retreat(index) 4704 return None 4705 4706 return exp.DataType( 4707 this=exp.DataType.Type.MAP, 4708 expressions=[key_type, value_type], 4709 nested=True, 4710 prefix=prefix, 4711 ) 4712 4713 nested = type_token in self.NESTED_TYPE_TOKENS 4714 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4715 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4716 expressions = None 4717 maybe_func = False 4718 4719 if self._match(TokenType.L_PAREN): 4720 if is_struct: 4721 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4722 elif nested: 4723 expressions = self._parse_csv( 4724 lambda: self._parse_types( 4725 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4726 ) 4727 ) 4728 elif type_token in self.ENUM_TYPE_TOKENS: 4729 expressions = self._parse_csv(self._parse_equality) 4730 elif is_aggregate: 4731 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4732 any_token=False, tokens=(TokenType.VAR,) 4733 ) 4734 if not func_or_ident or not self._match(TokenType.COMMA): 4735 return None 4736 expressions = self._parse_csv( 4737 lambda: self._parse_types( 4738 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4739 ) 4740 ) 4741 expressions.insert(0, func_or_ident) 4742 else: 4743 expressions = self._parse_csv(self._parse_type_size) 4744 4745 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4746 if type_token == TokenType.VECTOR and len(expressions) == 2: 4747 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4748 4749 if not expressions or not self._match(TokenType.R_PAREN): 4750 self._retreat(index) 4751 return None 4752 4753 maybe_func = True 4754 4755 values: t.Optional[t.List[exp.Expression]] = None 4756 4757 if nested and self._match(TokenType.LT): 4758 if is_struct: 4759 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4760 else: 4761 expressions = self._parse_csv( 4762 lambda: self._parse_types( 4763 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4764 ) 4765 ) 4766 4767 if not self._match(TokenType.GT): 4768 self.raise_error("Expecting >") 4769 4770 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4771 values = self._parse_csv(self._parse_assignment) 4772 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4773 4774 if type_token in self.TIMESTAMPS: 4775 if self._match_text_seq("WITH", "TIME", "ZONE"): 4776 maybe_func = False 4777 tz_type = ( 4778 exp.DataType.Type.TIMETZ 4779 if type_token in self.TIMES 4780 else exp.DataType.Type.TIMESTAMPTZ 4781 ) 4782 this = exp.DataType(this=tz_type, expressions=expressions) 4783 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4784 maybe_func = False 4785 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4786 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4787 maybe_func = False 4788 elif type_token == TokenType.INTERVAL: 4789 unit = self._parse_var(upper=True) 4790 if unit: 4791 if self._match_text_seq("TO"): 4792 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4793 4794 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4795 else: 4796 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4797 4798 if maybe_func and check_func: 4799 index2 = self._index 4800 peek = self._parse_string() 4801 4802 if not peek: 4803 self._retreat(index) 4804 return None 4805 4806 self._retreat(index2) 4807 4808 if not this: 4809 if self._match_text_seq("UNSIGNED"): 4810 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4811 if not unsigned_type_token: 4812 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4813 4814 type_token = unsigned_type_token or type_token 4815 4816 this = exp.DataType( 4817 this=exp.DataType.Type[type_token.value], 4818 expressions=expressions, 4819 nested=nested, 4820 prefix=prefix, 4821 ) 4822 4823 # Empty arrays/structs are allowed 4824 if values is not None: 4825 cls = exp.Struct if is_struct else exp.Array 4826 this = exp.cast(cls(expressions=values), this, copy=False) 4827 4828 elif expressions: 4829 this.set("expressions", expressions) 4830 4831 # https://materialize.com/docs/sql/types/list/#type-name 4832 while self._match(TokenType.LIST): 4833 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4834 4835 index = self._index 4836 4837 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4838 matched_array = self._match(TokenType.ARRAY) 4839 4840 while self._curr: 4841 datatype_token = self._prev.token_type 4842 matched_l_bracket = self._match(TokenType.L_BRACKET) 4843 if not matched_l_bracket and not matched_array: 4844 break 4845 4846 matched_array = False 4847 values = self._parse_csv(self._parse_assignment) or None 4848 if ( 4849 values 4850 and not schema 4851 and ( 4852 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4853 ) 4854 ): 4855 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4856 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4857 self._retreat(index) 4858 break 4859 4860 this = exp.DataType( 4861 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4862 ) 4863 self._match(TokenType.R_BRACKET) 4864 4865 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4866 converter = self.TYPE_CONVERTERS.get(this.this) 4867 if converter: 4868 this = converter(t.cast(exp.DataType, this)) 4869 4870 return this 4871 4872 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4873 index = self._index 4874 4875 if ( 4876 self._curr 4877 and self._next 4878 and self._curr.token_type in self.TYPE_TOKENS 4879 and self._next.token_type in self.TYPE_TOKENS 4880 ): 4881 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4882 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4883 this = self._parse_id_var() 4884 else: 4885 this = ( 4886 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4887 or self._parse_id_var() 4888 ) 4889 4890 self._match(TokenType.COLON) 4891 4892 if ( 4893 type_required 4894 and not isinstance(this, exp.DataType) 4895 and not self._match_set(self.TYPE_TOKENS, advance=False) 4896 ): 4897 self._retreat(index) 4898 return self._parse_types() 4899 4900 return self._parse_column_def(this) 4901 4902 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4903 if not self._match_text_seq("AT", "TIME", "ZONE"): 4904 return this 4905 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4906 4907 def _parse_column(self) -> t.Optional[exp.Expression]: 4908 this = self._parse_column_reference() 4909 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4910 4911 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4912 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4913 4914 return column 4915 4916 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4917 this = self._parse_field() 4918 if ( 4919 not this 4920 and self._match(TokenType.VALUES, advance=False) 4921 and self.VALUES_FOLLOWED_BY_PAREN 4922 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4923 ): 4924 this = self._parse_id_var() 4925 4926 if isinstance(this, exp.Identifier): 4927 # We bubble up comments from the Identifier to the Column 4928 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4929 4930 return this 4931 4932 def _parse_colon_as_variant_extract( 4933 self, this: t.Optional[exp.Expression] 4934 ) -> t.Optional[exp.Expression]: 4935 casts = [] 4936 json_path = [] 4937 4938 while self._match(TokenType.COLON): 4939 start_index = self._index 4940 4941 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4942 path = self._parse_column_ops( 4943 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4944 ) 4945 4946 # The cast :: operator has a lower precedence than the extraction operator :, so 4947 # we rearrange the AST appropriately to avoid casting the JSON path 4948 while isinstance(path, exp.Cast): 4949 casts.append(path.to) 4950 path = path.this 4951 4952 if casts: 4953 dcolon_offset = next( 4954 i 4955 for i, t in enumerate(self._tokens[start_index:]) 4956 if t.token_type == TokenType.DCOLON 4957 ) 4958 end_token = self._tokens[start_index + dcolon_offset - 1] 4959 else: 4960 end_token = self._prev 4961 4962 if path: 4963 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4964 4965 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4966 # Databricks transforms it back to the colon/dot notation 4967 if json_path: 4968 this = self.expression( 4969 exp.JSONExtract, 4970 this=this, 4971 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4972 variant_extract=True, 4973 ) 4974 4975 while casts: 4976 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4977 4978 return this 4979 4980 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4981 return self._parse_types() 4982 4983 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4984 this = self._parse_bracket(this) 4985 4986 while self._match_set(self.COLUMN_OPERATORS): 4987 op_token = self._prev.token_type 4988 op = self.COLUMN_OPERATORS.get(op_token) 4989 4990 if op_token == TokenType.DCOLON: 4991 field = self._parse_dcolon() 4992 if not field: 4993 self.raise_error("Expected type") 4994 elif op and self._curr: 4995 field = self._parse_column_reference() 4996 else: 4997 field = self._parse_field(any_token=True, anonymous_func=True) 4998 4999 if isinstance(field, exp.Func) and this: 5000 # bigquery allows function calls like x.y.count(...) 5001 # SAFE.SUBSTR(...) 5002 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5003 this = exp.replace_tree( 5004 this, 5005 lambda n: ( 5006 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5007 if n.table 5008 else n.this 5009 ) 5010 if isinstance(n, exp.Column) 5011 else n, 5012 ) 5013 5014 if op: 5015 this = op(self, this, field) 5016 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5017 this = self.expression( 5018 exp.Column, 5019 this=field, 5020 table=this.this, 5021 db=this.args.get("table"), 5022 catalog=this.args.get("db"), 5023 ) 5024 else: 5025 this = self.expression(exp.Dot, this=this, expression=field) 5026 5027 this = self._parse_bracket(this) 5028 5029 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5030 5031 def _parse_primary(self) -> t.Optional[exp.Expression]: 5032 if self._match_set(self.PRIMARY_PARSERS): 5033 token_type = self._prev.token_type 5034 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5035 5036 if token_type == TokenType.STRING: 5037 expressions = [primary] 5038 while self._match(TokenType.STRING): 5039 expressions.append(exp.Literal.string(self._prev.text)) 5040 5041 if len(expressions) > 1: 5042 return self.expression(exp.Concat, expressions=expressions) 5043 5044 return primary 5045 5046 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5047 return exp.Literal.number(f"0.{self._prev.text}") 5048 5049 if self._match(TokenType.L_PAREN): 5050 comments = self._prev_comments 5051 query = self._parse_select() 5052 5053 if query: 5054 expressions = [query] 5055 else: 5056 expressions = self._parse_expressions() 5057 5058 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5059 5060 if not this and self._match(TokenType.R_PAREN, advance=False): 5061 this = self.expression(exp.Tuple) 5062 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5063 this = self._parse_subquery(this=this, parse_alias=False) 5064 elif isinstance(this, exp.Subquery): 5065 this = self._parse_subquery( 5066 this=self._parse_set_operations(this), parse_alias=False 5067 ) 5068 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5069 this = self.expression(exp.Tuple, expressions=expressions) 5070 else: 5071 this = self.expression(exp.Paren, this=this) 5072 5073 if this: 5074 this.add_comments(comments) 5075 5076 self._match_r_paren(expression=this) 5077 return this 5078 5079 return None 5080 5081 def _parse_field( 5082 self, 5083 any_token: bool = False, 5084 tokens: t.Optional[t.Collection[TokenType]] = None, 5085 anonymous_func: bool = False, 5086 ) -> t.Optional[exp.Expression]: 5087 if anonymous_func: 5088 field = ( 5089 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5090 or self._parse_primary() 5091 ) 5092 else: 5093 field = self._parse_primary() or self._parse_function( 5094 anonymous=anonymous_func, any_token=any_token 5095 ) 5096 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5097 5098 def _parse_function( 5099 self, 5100 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5101 anonymous: bool = False, 5102 optional_parens: bool = True, 5103 any_token: bool = False, 5104 ) -> t.Optional[exp.Expression]: 5105 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5106 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5107 fn_syntax = False 5108 if ( 5109 self._match(TokenType.L_BRACE, advance=False) 5110 and self._next 5111 and self._next.text.upper() == "FN" 5112 ): 5113 self._advance(2) 5114 fn_syntax = True 5115 5116 func = self._parse_function_call( 5117 functions=functions, 5118 anonymous=anonymous, 5119 optional_parens=optional_parens, 5120 any_token=any_token, 5121 ) 5122 5123 if fn_syntax: 5124 self._match(TokenType.R_BRACE) 5125 5126 return func 5127 5128 def _parse_function_call( 5129 self, 5130 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5131 anonymous: bool = False, 5132 optional_parens: bool = True, 5133 any_token: bool = False, 5134 ) -> t.Optional[exp.Expression]: 5135 if not self._curr: 5136 return None 5137 5138 comments = self._curr.comments 5139 token_type = self._curr.token_type 5140 this = self._curr.text 5141 upper = this.upper() 5142 5143 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5144 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5145 self._advance() 5146 return self._parse_window(parser(self)) 5147 5148 if not self._next or self._next.token_type != TokenType.L_PAREN: 5149 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5150 self._advance() 5151 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5152 5153 return None 5154 5155 if any_token: 5156 if token_type in self.RESERVED_TOKENS: 5157 return None 5158 elif token_type not in self.FUNC_TOKENS: 5159 return None 5160 5161 self._advance(2) 5162 5163 parser = self.FUNCTION_PARSERS.get(upper) 5164 if parser and not anonymous: 5165 this = parser(self) 5166 else: 5167 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5168 5169 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5170 this = self.expression(subquery_predicate, this=self._parse_select()) 5171 self._match_r_paren() 5172 return this 5173 5174 if functions is None: 5175 functions = self.FUNCTIONS 5176 5177 function = functions.get(upper) 5178 5179 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5180 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5181 5182 if alias: 5183 args = self._kv_to_prop_eq(args) 5184 5185 if function and not anonymous: 5186 if "dialect" in function.__code__.co_varnames: 5187 func = function(args, dialect=self.dialect) 5188 else: 5189 func = function(args) 5190 5191 func = self.validate_expression(func, args) 5192 if not self.dialect.NORMALIZE_FUNCTIONS: 5193 func.meta["name"] = this 5194 5195 this = func 5196 else: 5197 if token_type == TokenType.IDENTIFIER: 5198 this = exp.Identifier(this=this, quoted=True) 5199 this = self.expression(exp.Anonymous, this=this, expressions=args) 5200 5201 if isinstance(this, exp.Expression): 5202 this.add_comments(comments) 5203 5204 self._match_r_paren(this) 5205 return self._parse_window(this) 5206 5207 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5208 return expression 5209 5210 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5211 transformed = [] 5212 5213 for index, e in enumerate(expressions): 5214 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5215 if isinstance(e, exp.Alias): 5216 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5217 5218 if not isinstance(e, exp.PropertyEQ): 5219 e = self.expression( 5220 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5221 ) 5222 5223 if isinstance(e.this, exp.Column): 5224 e.this.replace(e.this.this) 5225 else: 5226 e = self._to_prop_eq(e, index) 5227 5228 transformed.append(e) 5229 5230 return transformed 5231 5232 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5233 return self._parse_column_def(self._parse_id_var()) 5234 5235 def _parse_user_defined_function( 5236 self, kind: t.Optional[TokenType] = None 5237 ) -> t.Optional[exp.Expression]: 5238 this = self._parse_id_var() 5239 5240 while self._match(TokenType.DOT): 5241 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5242 5243 if not self._match(TokenType.L_PAREN): 5244 return this 5245 5246 expressions = self._parse_csv(self._parse_function_parameter) 5247 self._match_r_paren() 5248 return self.expression( 5249 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5250 ) 5251 5252 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5253 literal = self._parse_primary() 5254 if literal: 5255 return self.expression(exp.Introducer, this=token.text, expression=literal) 5256 5257 return self.expression(exp.Identifier, this=token.text) 5258 5259 def _parse_session_parameter(self) -> exp.SessionParameter: 5260 kind = None 5261 this = self._parse_id_var() or self._parse_primary() 5262 5263 if this and self._match(TokenType.DOT): 5264 kind = this.name 5265 this = self._parse_var() or self._parse_primary() 5266 5267 return self.expression(exp.SessionParameter, this=this, kind=kind) 5268 5269 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5270 return self._parse_id_var() 5271 5272 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5273 index = self._index 5274 5275 if self._match(TokenType.L_PAREN): 5276 expressions = t.cast( 5277 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5278 ) 5279 5280 if not self._match(TokenType.R_PAREN): 5281 self._retreat(index) 5282 else: 5283 expressions = [self._parse_lambda_arg()] 5284 5285 if self._match_set(self.LAMBDAS): 5286 return self.LAMBDAS[self._prev.token_type](self, expressions) 5287 5288 self._retreat(index) 5289 5290 this: t.Optional[exp.Expression] 5291 5292 if self._match(TokenType.DISTINCT): 5293 this = self.expression( 5294 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5295 ) 5296 else: 5297 this = self._parse_select_or_expression(alias=alias) 5298 5299 return self._parse_limit( 5300 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5301 ) 5302 5303 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5304 index = self._index 5305 if not self._match(TokenType.L_PAREN): 5306 return this 5307 5308 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5309 # expr can be of both types 5310 if self._match_set(self.SELECT_START_TOKENS): 5311 self._retreat(index) 5312 return this 5313 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5314 self._match_r_paren() 5315 return self.expression(exp.Schema, this=this, expressions=args) 5316 5317 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5318 return self._parse_column_def(self._parse_field(any_token=True)) 5319 5320 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5321 # column defs are not really columns, they're identifiers 5322 if isinstance(this, exp.Column): 5323 this = this.this 5324 5325 kind = self._parse_types(schema=True) 5326 5327 if self._match_text_seq("FOR", "ORDINALITY"): 5328 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5329 5330 constraints: t.List[exp.Expression] = [] 5331 5332 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5333 ("ALIAS", "MATERIALIZED") 5334 ): 5335 persisted = self._prev.text.upper() == "MATERIALIZED" 5336 constraints.append( 5337 self.expression( 5338 exp.ComputedColumnConstraint, 5339 this=self._parse_assignment(), 5340 persisted=persisted or self._match_text_seq("PERSISTED"), 5341 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5342 ) 5343 ) 5344 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5345 self._match(TokenType.ALIAS) 5346 constraints.append( 5347 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5348 ) 5349 5350 while True: 5351 constraint = self._parse_column_constraint() 5352 if not constraint: 5353 break 5354 constraints.append(constraint) 5355 5356 if not kind and not constraints: 5357 return this 5358 5359 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5360 5361 def _parse_auto_increment( 5362 self, 5363 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5364 start = None 5365 increment = None 5366 5367 if self._match(TokenType.L_PAREN, advance=False): 5368 args = self._parse_wrapped_csv(self._parse_bitwise) 5369 start = seq_get(args, 0) 5370 increment = seq_get(args, 1) 5371 elif self._match_text_seq("START"): 5372 start = self._parse_bitwise() 5373 self._match_text_seq("INCREMENT") 5374 increment = self._parse_bitwise() 5375 5376 if start and increment: 5377 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5378 5379 return exp.AutoIncrementColumnConstraint() 5380 5381 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5382 if not self._match_text_seq("REFRESH"): 5383 self._retreat(self._index - 1) 5384 return None 5385 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5386 5387 def _parse_compress(self) -> exp.CompressColumnConstraint: 5388 if self._match(TokenType.L_PAREN, advance=False): 5389 return self.expression( 5390 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5391 ) 5392 5393 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5394 5395 def _parse_generated_as_identity( 5396 self, 5397 ) -> ( 5398 exp.GeneratedAsIdentityColumnConstraint 5399 | exp.ComputedColumnConstraint 5400 | exp.GeneratedAsRowColumnConstraint 5401 ): 5402 if self._match_text_seq("BY", "DEFAULT"): 5403 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5404 this = self.expression( 5405 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5406 ) 5407 else: 5408 self._match_text_seq("ALWAYS") 5409 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5410 5411 self._match(TokenType.ALIAS) 5412 5413 if self._match_text_seq("ROW"): 5414 start = self._match_text_seq("START") 5415 if not start: 5416 self._match(TokenType.END) 5417 hidden = self._match_text_seq("HIDDEN") 5418 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5419 5420 identity = self._match_text_seq("IDENTITY") 5421 5422 if self._match(TokenType.L_PAREN): 5423 if self._match(TokenType.START_WITH): 5424 this.set("start", self._parse_bitwise()) 5425 if self._match_text_seq("INCREMENT", "BY"): 5426 this.set("increment", self._parse_bitwise()) 5427 if self._match_text_seq("MINVALUE"): 5428 this.set("minvalue", self._parse_bitwise()) 5429 if self._match_text_seq("MAXVALUE"): 5430 this.set("maxvalue", self._parse_bitwise()) 5431 5432 if self._match_text_seq("CYCLE"): 5433 this.set("cycle", True) 5434 elif self._match_text_seq("NO", "CYCLE"): 5435 this.set("cycle", False) 5436 5437 if not identity: 5438 this.set("expression", self._parse_range()) 5439 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5440 args = self._parse_csv(self._parse_bitwise) 5441 this.set("start", seq_get(args, 0)) 5442 this.set("increment", seq_get(args, 1)) 5443 5444 self._match_r_paren() 5445 5446 return this 5447 5448 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5449 self._match_text_seq("LENGTH") 5450 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5451 5452 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5453 if self._match_text_seq("NULL"): 5454 return self.expression(exp.NotNullColumnConstraint) 5455 if self._match_text_seq("CASESPECIFIC"): 5456 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5457 if self._match_text_seq("FOR", "REPLICATION"): 5458 return self.expression(exp.NotForReplicationColumnConstraint) 5459 return None 5460 5461 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5462 if self._match(TokenType.CONSTRAINT): 5463 this = self._parse_id_var() 5464 else: 5465 this = None 5466 5467 if self._match_texts(self.CONSTRAINT_PARSERS): 5468 return self.expression( 5469 exp.ColumnConstraint, 5470 this=this, 5471 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5472 ) 5473 5474 return this 5475 5476 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5477 if not self._match(TokenType.CONSTRAINT): 5478 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5479 5480 return self.expression( 5481 exp.Constraint, 5482 this=self._parse_id_var(), 5483 expressions=self._parse_unnamed_constraints(), 5484 ) 5485 5486 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5487 constraints = [] 5488 while True: 5489 constraint = self._parse_unnamed_constraint() or self._parse_function() 5490 if not constraint: 5491 break 5492 constraints.append(constraint) 5493 5494 return constraints 5495 5496 def _parse_unnamed_constraint( 5497 self, constraints: t.Optional[t.Collection[str]] = None 5498 ) -> t.Optional[exp.Expression]: 5499 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5500 constraints or self.CONSTRAINT_PARSERS 5501 ): 5502 return None 5503 5504 constraint = self._prev.text.upper() 5505 if constraint not in self.CONSTRAINT_PARSERS: 5506 self.raise_error(f"No parser found for schema constraint {constraint}.") 5507 5508 return self.CONSTRAINT_PARSERS[constraint](self) 5509 5510 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5511 return self._parse_id_var(any_token=False) 5512 5513 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5514 self._match_text_seq("KEY") 5515 return self.expression( 5516 exp.UniqueColumnConstraint, 5517 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5518 this=self._parse_schema(self._parse_unique_key()), 5519 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5520 on_conflict=self._parse_on_conflict(), 5521 ) 5522 5523 def _parse_key_constraint_options(self) -> t.List[str]: 5524 options = [] 5525 while True: 5526 if not self._curr: 5527 break 5528 5529 if self._match(TokenType.ON): 5530 action = None 5531 on = self._advance_any() and self._prev.text 5532 5533 if self._match_text_seq("NO", "ACTION"): 5534 action = "NO ACTION" 5535 elif self._match_text_seq("CASCADE"): 5536 action = "CASCADE" 5537 elif self._match_text_seq("RESTRICT"): 5538 action = "RESTRICT" 5539 elif self._match_pair(TokenType.SET, TokenType.NULL): 5540 action = "SET NULL" 5541 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5542 action = "SET DEFAULT" 5543 else: 5544 self.raise_error("Invalid key constraint") 5545 5546 options.append(f"ON {on} {action}") 5547 else: 5548 var = self._parse_var_from_options( 5549 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5550 ) 5551 if not var: 5552 break 5553 options.append(var.name) 5554 5555 return options 5556 5557 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5558 if match and not self._match(TokenType.REFERENCES): 5559 return None 5560 5561 expressions = None 5562 this = self._parse_table(schema=True) 5563 options = self._parse_key_constraint_options() 5564 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5565 5566 def _parse_foreign_key(self) -> exp.ForeignKey: 5567 expressions = self._parse_wrapped_id_vars() 5568 reference = self._parse_references() 5569 options = {} 5570 5571 while self._match(TokenType.ON): 5572 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5573 self.raise_error("Expected DELETE or UPDATE") 5574 5575 kind = self._prev.text.lower() 5576 5577 if self._match_text_seq("NO", "ACTION"): 5578 action = "NO ACTION" 5579 elif self._match(TokenType.SET): 5580 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5581 action = "SET " + self._prev.text.upper() 5582 else: 5583 self._advance() 5584 action = self._prev.text.upper() 5585 5586 options[kind] = action 5587 5588 return self.expression( 5589 exp.ForeignKey, 5590 expressions=expressions, 5591 reference=reference, 5592 **options, # type: ignore 5593 ) 5594 5595 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5596 return self._parse_field() 5597 5598 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5599 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5600 self._retreat(self._index - 1) 5601 return None 5602 5603 id_vars = self._parse_wrapped_id_vars() 5604 return self.expression( 5605 exp.PeriodForSystemTimeConstraint, 5606 this=seq_get(id_vars, 0), 5607 expression=seq_get(id_vars, 1), 5608 ) 5609 5610 def _parse_primary_key( 5611 self, wrapped_optional: bool = False, in_props: bool = False 5612 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5613 desc = ( 5614 self._match_set((TokenType.ASC, TokenType.DESC)) 5615 and self._prev.token_type == TokenType.DESC 5616 ) 5617 5618 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5619 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5620 5621 expressions = self._parse_wrapped_csv( 5622 self._parse_primary_key_part, optional=wrapped_optional 5623 ) 5624 options = self._parse_key_constraint_options() 5625 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5626 5627 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5628 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5629 5630 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5631 """ 5632 Parses a datetime column in ODBC format. We parse the column into the corresponding 5633 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5634 same as we did for `DATE('yyyy-mm-dd')`. 5635 5636 Reference: 5637 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5638 """ 5639 self._match(TokenType.VAR) 5640 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5641 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5642 if not self._match(TokenType.R_BRACE): 5643 self.raise_error("Expected }") 5644 return expression 5645 5646 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5647 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5648 return this 5649 5650 bracket_kind = self._prev.token_type 5651 if ( 5652 bracket_kind == TokenType.L_BRACE 5653 and self._curr 5654 and self._curr.token_type == TokenType.VAR 5655 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5656 ): 5657 return self._parse_odbc_datetime_literal() 5658 5659 expressions = self._parse_csv( 5660 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5661 ) 5662 5663 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5664 self.raise_error("Expected ]") 5665 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5666 self.raise_error("Expected }") 5667 5668 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5669 if bracket_kind == TokenType.L_BRACE: 5670 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5671 elif not this: 5672 this = build_array_constructor( 5673 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5674 ) 5675 else: 5676 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5677 if constructor_type: 5678 return build_array_constructor( 5679 constructor_type, 5680 args=expressions, 5681 bracket_kind=bracket_kind, 5682 dialect=self.dialect, 5683 ) 5684 5685 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5686 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5687 5688 self._add_comments(this) 5689 return self._parse_bracket(this) 5690 5691 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5692 if self._match(TokenType.COLON): 5693 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5694 return this 5695 5696 def _parse_case(self) -> t.Optional[exp.Expression]: 5697 ifs = [] 5698 default = None 5699 5700 comments = self._prev_comments 5701 expression = self._parse_assignment() 5702 5703 while self._match(TokenType.WHEN): 5704 this = self._parse_assignment() 5705 self._match(TokenType.THEN) 5706 then = self._parse_assignment() 5707 ifs.append(self.expression(exp.If, this=this, true=then)) 5708 5709 if self._match(TokenType.ELSE): 5710 default = self._parse_assignment() 5711 5712 if not self._match(TokenType.END): 5713 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5714 default = exp.column("interval") 5715 else: 5716 self.raise_error("Expected END after CASE", self._prev) 5717 5718 return self.expression( 5719 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5720 ) 5721 5722 def _parse_if(self) -> t.Optional[exp.Expression]: 5723 if self._match(TokenType.L_PAREN): 5724 args = self._parse_csv(self._parse_assignment) 5725 this = self.validate_expression(exp.If.from_arg_list(args), args) 5726 self._match_r_paren() 5727 else: 5728 index = self._index - 1 5729 5730 if self.NO_PAREN_IF_COMMANDS and index == 0: 5731 return self._parse_as_command(self._prev) 5732 5733 condition = self._parse_assignment() 5734 5735 if not condition: 5736 self._retreat(index) 5737 return None 5738 5739 self._match(TokenType.THEN) 5740 true = self._parse_assignment() 5741 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5742 self._match(TokenType.END) 5743 this = self.expression(exp.If, this=condition, true=true, false=false) 5744 5745 return this 5746 5747 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5748 if not self._match_text_seq("VALUE", "FOR"): 5749 self._retreat(self._index - 1) 5750 return None 5751 5752 return self.expression( 5753 exp.NextValueFor, 5754 this=self._parse_column(), 5755 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5756 ) 5757 5758 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5759 this = self._parse_function() or self._parse_var_or_string(upper=True) 5760 5761 if self._match(TokenType.FROM): 5762 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5763 5764 if not self._match(TokenType.COMMA): 5765 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5766 5767 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5768 5769 def _parse_gap_fill(self) -> exp.GapFill: 5770 self._match(TokenType.TABLE) 5771 this = self._parse_table() 5772 5773 self._match(TokenType.COMMA) 5774 args = [this, *self._parse_csv(self._parse_lambda)] 5775 5776 gap_fill = exp.GapFill.from_arg_list(args) 5777 return self.validate_expression(gap_fill, args) 5778 5779 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5780 this = self._parse_assignment() 5781 5782 if not self._match(TokenType.ALIAS): 5783 if self._match(TokenType.COMMA): 5784 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5785 5786 self.raise_error("Expected AS after CAST") 5787 5788 fmt = None 5789 to = self._parse_types() 5790 5791 if self._match(TokenType.FORMAT): 5792 fmt_string = self._parse_string() 5793 fmt = self._parse_at_time_zone(fmt_string) 5794 5795 if not to: 5796 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5797 if to.this in exp.DataType.TEMPORAL_TYPES: 5798 this = self.expression( 5799 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5800 this=this, 5801 format=exp.Literal.string( 5802 format_time( 5803 fmt_string.this if fmt_string else "", 5804 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5805 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5806 ) 5807 ), 5808 safe=safe, 5809 ) 5810 5811 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5812 this.set("zone", fmt.args["zone"]) 5813 return this 5814 elif not to: 5815 self.raise_error("Expected TYPE after CAST") 5816 elif isinstance(to, exp.Identifier): 5817 to = exp.DataType.build(to.name, udt=True) 5818 elif to.this == exp.DataType.Type.CHAR: 5819 if self._match(TokenType.CHARACTER_SET): 5820 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5821 5822 return self.expression( 5823 exp.Cast if strict else exp.TryCast, 5824 this=this, 5825 to=to, 5826 format=fmt, 5827 safe=safe, 5828 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5829 ) 5830 5831 def _parse_string_agg(self) -> exp.Expression: 5832 if self._match(TokenType.DISTINCT): 5833 args: t.List[t.Optional[exp.Expression]] = [ 5834 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5835 ] 5836 if self._match(TokenType.COMMA): 5837 args.extend(self._parse_csv(self._parse_assignment)) 5838 else: 5839 args = self._parse_csv(self._parse_assignment) # type: ignore 5840 5841 index = self._index 5842 if not self._match(TokenType.R_PAREN) and args: 5843 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5844 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5845 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5846 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5847 5848 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5849 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5850 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5851 if not self._match_text_seq("WITHIN", "GROUP"): 5852 self._retreat(index) 5853 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5854 5855 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5856 order = self._parse_order(this=seq_get(args, 0)) 5857 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5858 5859 def _parse_convert( 5860 self, strict: bool, safe: t.Optional[bool] = None 5861 ) -> t.Optional[exp.Expression]: 5862 this = self._parse_bitwise() 5863 5864 if self._match(TokenType.USING): 5865 to: t.Optional[exp.Expression] = self.expression( 5866 exp.CharacterSet, this=self._parse_var() 5867 ) 5868 elif self._match(TokenType.COMMA): 5869 to = self._parse_types() 5870 else: 5871 to = None 5872 5873 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5874 5875 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5876 """ 5877 There are generally two variants of the DECODE function: 5878 5879 - DECODE(bin, charset) 5880 - DECODE(expression, search, result [, search, result] ... [, default]) 5881 5882 The second variant will always be parsed into a CASE expression. Note that NULL 5883 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5884 instead of relying on pattern matching. 5885 """ 5886 args = self._parse_csv(self._parse_assignment) 5887 5888 if len(args) < 3: 5889 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5890 5891 expression, *expressions = args 5892 if not expression: 5893 return None 5894 5895 ifs = [] 5896 for search, result in zip(expressions[::2], expressions[1::2]): 5897 if not search or not result: 5898 return None 5899 5900 if isinstance(search, exp.Literal): 5901 ifs.append( 5902 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5903 ) 5904 elif isinstance(search, exp.Null): 5905 ifs.append( 5906 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5907 ) 5908 else: 5909 cond = exp.or_( 5910 exp.EQ(this=expression.copy(), expression=search), 5911 exp.and_( 5912 exp.Is(this=expression.copy(), expression=exp.Null()), 5913 exp.Is(this=search.copy(), expression=exp.Null()), 5914 copy=False, 5915 ), 5916 copy=False, 5917 ) 5918 ifs.append(exp.If(this=cond, true=result)) 5919 5920 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5921 5922 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5923 self._match_text_seq("KEY") 5924 key = self._parse_column() 5925 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5926 self._match_text_seq("VALUE") 5927 value = self._parse_bitwise() 5928 5929 if not key and not value: 5930 return None 5931 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5932 5933 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5934 if not this or not self._match_text_seq("FORMAT", "JSON"): 5935 return this 5936 5937 return self.expression(exp.FormatJson, this=this) 5938 5939 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5940 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5941 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5942 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5943 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5944 else: 5945 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5946 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5947 5948 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 5949 5950 if not empty and not error and not null: 5951 return None 5952 5953 return self.expression( 5954 exp.OnCondition, 5955 empty=empty, 5956 error=error, 5957 null=null, 5958 ) 5959 5960 def _parse_on_handling( 5961 self, on: str, *values: str 5962 ) -> t.Optional[str] | t.Optional[exp.Expression]: 5963 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 5964 for value in values: 5965 if self._match_text_seq(value, "ON", on): 5966 return f"{value} ON {on}" 5967 5968 index = self._index 5969 if self._match(TokenType.DEFAULT): 5970 default_value = self._parse_bitwise() 5971 if self._match_text_seq("ON", on): 5972 return default_value 5973 5974 self._retreat(index) 5975 5976 return None 5977 5978 @t.overload 5979 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5980 5981 @t.overload 5982 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5983 5984 def _parse_json_object(self, agg=False): 5985 star = self._parse_star() 5986 expressions = ( 5987 [star] 5988 if star 5989 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5990 ) 5991 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5992 5993 unique_keys = None 5994 if self._match_text_seq("WITH", "UNIQUE"): 5995 unique_keys = True 5996 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5997 unique_keys = False 5998 5999 self._match_text_seq("KEYS") 6000 6001 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6002 self._parse_type() 6003 ) 6004 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6005 6006 return self.expression( 6007 exp.JSONObjectAgg if agg else exp.JSONObject, 6008 expressions=expressions, 6009 null_handling=null_handling, 6010 unique_keys=unique_keys, 6011 return_type=return_type, 6012 encoding=encoding, 6013 ) 6014 6015 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6016 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6017 if not self._match_text_seq("NESTED"): 6018 this = self._parse_id_var() 6019 kind = self._parse_types(allow_identifiers=False) 6020 nested = None 6021 else: 6022 this = None 6023 kind = None 6024 nested = True 6025 6026 path = self._match_text_seq("PATH") and self._parse_string() 6027 nested_schema = nested and self._parse_json_schema() 6028 6029 return self.expression( 6030 exp.JSONColumnDef, 6031 this=this, 6032 kind=kind, 6033 path=path, 6034 nested_schema=nested_schema, 6035 ) 6036 6037 def _parse_json_schema(self) -> exp.JSONSchema: 6038 self._match_text_seq("COLUMNS") 6039 return self.expression( 6040 exp.JSONSchema, 6041 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6042 ) 6043 6044 def _parse_json_table(self) -> exp.JSONTable: 6045 this = self._parse_format_json(self._parse_bitwise()) 6046 path = self._match(TokenType.COMMA) and self._parse_string() 6047 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6048 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6049 schema = self._parse_json_schema() 6050 6051 return exp.JSONTable( 6052 this=this, 6053 schema=schema, 6054 path=path, 6055 error_handling=error_handling, 6056 empty_handling=empty_handling, 6057 ) 6058 6059 def _parse_match_against(self) -> exp.MatchAgainst: 6060 expressions = self._parse_csv(self._parse_column) 6061 6062 self._match_text_seq(")", "AGAINST", "(") 6063 6064 this = self._parse_string() 6065 6066 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6067 modifier = "IN NATURAL LANGUAGE MODE" 6068 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6069 modifier = f"{modifier} WITH QUERY EXPANSION" 6070 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6071 modifier = "IN BOOLEAN MODE" 6072 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6073 modifier = "WITH QUERY EXPANSION" 6074 else: 6075 modifier = None 6076 6077 return self.expression( 6078 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6079 ) 6080 6081 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6082 def _parse_open_json(self) -> exp.OpenJSON: 6083 this = self._parse_bitwise() 6084 path = self._match(TokenType.COMMA) and self._parse_string() 6085 6086 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6087 this = self._parse_field(any_token=True) 6088 kind = self._parse_types() 6089 path = self._parse_string() 6090 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6091 6092 return self.expression( 6093 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6094 ) 6095 6096 expressions = None 6097 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6098 self._match_l_paren() 6099 expressions = self._parse_csv(_parse_open_json_column_def) 6100 6101 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6102 6103 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6104 args = self._parse_csv(self._parse_bitwise) 6105 6106 if self._match(TokenType.IN): 6107 return self.expression( 6108 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6109 ) 6110 6111 if haystack_first: 6112 haystack = seq_get(args, 0) 6113 needle = seq_get(args, 1) 6114 else: 6115 needle = seq_get(args, 0) 6116 haystack = seq_get(args, 1) 6117 6118 return self.expression( 6119 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6120 ) 6121 6122 def _parse_predict(self) -> exp.Predict: 6123 self._match_text_seq("MODEL") 6124 this = self._parse_table() 6125 6126 self._match(TokenType.COMMA) 6127 self._match_text_seq("TABLE") 6128 6129 return self.expression( 6130 exp.Predict, 6131 this=this, 6132 expression=self._parse_table(), 6133 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6134 ) 6135 6136 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6137 args = self._parse_csv(self._parse_table) 6138 return exp.JoinHint(this=func_name.upper(), expressions=args) 6139 6140 def _parse_substring(self) -> exp.Substring: 6141 # Postgres supports the form: substring(string [from int] [for int]) 6142 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6143 6144 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6145 6146 if self._match(TokenType.FROM): 6147 args.append(self._parse_bitwise()) 6148 if self._match(TokenType.FOR): 6149 if len(args) == 1: 6150 args.append(exp.Literal.number(1)) 6151 args.append(self._parse_bitwise()) 6152 6153 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6154 6155 def _parse_trim(self) -> exp.Trim: 6156 # https://www.w3resource.com/sql/character-functions/trim.php 6157 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6158 6159 position = None 6160 collation = None 6161 expression = None 6162 6163 if self._match_texts(self.TRIM_TYPES): 6164 position = self._prev.text.upper() 6165 6166 this = self._parse_bitwise() 6167 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6168 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6169 expression = self._parse_bitwise() 6170 6171 if invert_order: 6172 this, expression = expression, this 6173 6174 if self._match(TokenType.COLLATE): 6175 collation = self._parse_bitwise() 6176 6177 return self.expression( 6178 exp.Trim, this=this, position=position, expression=expression, collation=collation 6179 ) 6180 6181 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6182 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6183 6184 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6185 return self._parse_window(self._parse_id_var(), alias=True) 6186 6187 def _parse_respect_or_ignore_nulls( 6188 self, this: t.Optional[exp.Expression] 6189 ) -> t.Optional[exp.Expression]: 6190 if self._match_text_seq("IGNORE", "NULLS"): 6191 return self.expression(exp.IgnoreNulls, this=this) 6192 if self._match_text_seq("RESPECT", "NULLS"): 6193 return self.expression(exp.RespectNulls, this=this) 6194 return this 6195 6196 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6197 if self._match(TokenType.HAVING): 6198 self._match_texts(("MAX", "MIN")) 6199 max = self._prev.text.upper() != "MIN" 6200 return self.expression( 6201 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6202 ) 6203 6204 return this 6205 6206 def _parse_window( 6207 self, this: t.Optional[exp.Expression], alias: bool = False 6208 ) -> t.Optional[exp.Expression]: 6209 func = this 6210 comments = func.comments if isinstance(func, exp.Expression) else None 6211 6212 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6213 self._match(TokenType.WHERE) 6214 this = self.expression( 6215 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6216 ) 6217 self._match_r_paren() 6218 6219 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6220 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6221 if self._match_text_seq("WITHIN", "GROUP"): 6222 order = self._parse_wrapped(self._parse_order) 6223 this = self.expression(exp.WithinGroup, this=this, expression=order) 6224 6225 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6226 # Some dialects choose to implement and some do not. 6227 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6228 6229 # There is some code above in _parse_lambda that handles 6230 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6231 6232 # The below changes handle 6233 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6234 6235 # Oracle allows both formats 6236 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6237 # and Snowflake chose to do the same for familiarity 6238 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6239 if isinstance(this, exp.AggFunc): 6240 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6241 6242 if ignore_respect and ignore_respect is not this: 6243 ignore_respect.replace(ignore_respect.this) 6244 this = self.expression(ignore_respect.__class__, this=this) 6245 6246 this = self._parse_respect_or_ignore_nulls(this) 6247 6248 # bigquery select from window x AS (partition by ...) 6249 if alias: 6250 over = None 6251 self._match(TokenType.ALIAS) 6252 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6253 return this 6254 else: 6255 over = self._prev.text.upper() 6256 6257 if comments and isinstance(func, exp.Expression): 6258 func.pop_comments() 6259 6260 if not self._match(TokenType.L_PAREN): 6261 return self.expression( 6262 exp.Window, 6263 comments=comments, 6264 this=this, 6265 alias=self._parse_id_var(False), 6266 over=over, 6267 ) 6268 6269 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6270 6271 first = self._match(TokenType.FIRST) 6272 if self._match_text_seq("LAST"): 6273 first = False 6274 6275 partition, order = self._parse_partition_and_order() 6276 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6277 6278 if kind: 6279 self._match(TokenType.BETWEEN) 6280 start = self._parse_window_spec() 6281 self._match(TokenType.AND) 6282 end = self._parse_window_spec() 6283 6284 spec = self.expression( 6285 exp.WindowSpec, 6286 kind=kind, 6287 start=start["value"], 6288 start_side=start["side"], 6289 end=end["value"], 6290 end_side=end["side"], 6291 ) 6292 else: 6293 spec = None 6294 6295 self._match_r_paren() 6296 6297 window = self.expression( 6298 exp.Window, 6299 comments=comments, 6300 this=this, 6301 partition_by=partition, 6302 order=order, 6303 spec=spec, 6304 alias=window_alias, 6305 over=over, 6306 first=first, 6307 ) 6308 6309 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6310 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6311 return self._parse_window(window, alias=alias) 6312 6313 return window 6314 6315 def _parse_partition_and_order( 6316 self, 6317 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6318 return self._parse_partition_by(), self._parse_order() 6319 6320 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6321 self._match(TokenType.BETWEEN) 6322 6323 return { 6324 "value": ( 6325 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6326 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6327 or self._parse_bitwise() 6328 ), 6329 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6330 } 6331 6332 def _parse_alias( 6333 self, this: t.Optional[exp.Expression], explicit: bool = False 6334 ) -> t.Optional[exp.Expression]: 6335 any_token = self._match(TokenType.ALIAS) 6336 comments = self._prev_comments or [] 6337 6338 if explicit and not any_token: 6339 return this 6340 6341 if self._match(TokenType.L_PAREN): 6342 aliases = self.expression( 6343 exp.Aliases, 6344 comments=comments, 6345 this=this, 6346 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6347 ) 6348 self._match_r_paren(aliases) 6349 return aliases 6350 6351 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6352 self.STRING_ALIASES and self._parse_string_as_identifier() 6353 ) 6354 6355 if alias: 6356 comments.extend(alias.pop_comments()) 6357 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6358 column = this.this 6359 6360 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6361 if not this.comments and column and column.comments: 6362 this.comments = column.pop_comments() 6363 6364 return this 6365 6366 def _parse_id_var( 6367 self, 6368 any_token: bool = True, 6369 tokens: t.Optional[t.Collection[TokenType]] = None, 6370 ) -> t.Optional[exp.Expression]: 6371 expression = self._parse_identifier() 6372 if not expression and ( 6373 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6374 ): 6375 quoted = self._prev.token_type == TokenType.STRING 6376 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6377 6378 return expression 6379 6380 def _parse_string(self) -> t.Optional[exp.Expression]: 6381 if self._match_set(self.STRING_PARSERS): 6382 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6383 return self._parse_placeholder() 6384 6385 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6386 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6387 6388 def _parse_number(self) -> t.Optional[exp.Expression]: 6389 if self._match_set(self.NUMERIC_PARSERS): 6390 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6391 return self._parse_placeholder() 6392 6393 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6394 if self._match(TokenType.IDENTIFIER): 6395 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6396 return self._parse_placeholder() 6397 6398 def _parse_var( 6399 self, 6400 any_token: bool = False, 6401 tokens: t.Optional[t.Collection[TokenType]] = None, 6402 upper: bool = False, 6403 ) -> t.Optional[exp.Expression]: 6404 if ( 6405 (any_token and self._advance_any()) 6406 or self._match(TokenType.VAR) 6407 or (self._match_set(tokens) if tokens else False) 6408 ): 6409 return self.expression( 6410 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6411 ) 6412 return self._parse_placeholder() 6413 6414 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6415 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6416 self._advance() 6417 return self._prev 6418 return None 6419 6420 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6421 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6422 6423 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6424 return self._parse_primary() or self._parse_var(any_token=True) 6425 6426 def _parse_null(self) -> t.Optional[exp.Expression]: 6427 if self._match_set(self.NULL_TOKENS): 6428 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6429 return self._parse_placeholder() 6430 6431 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6432 if self._match(TokenType.TRUE): 6433 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6434 if self._match(TokenType.FALSE): 6435 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6436 return self._parse_placeholder() 6437 6438 def _parse_star(self) -> t.Optional[exp.Expression]: 6439 if self._match(TokenType.STAR): 6440 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6441 return self._parse_placeholder() 6442 6443 def _parse_parameter(self) -> exp.Parameter: 6444 this = self._parse_identifier() or self._parse_primary_or_var() 6445 return self.expression(exp.Parameter, this=this) 6446 6447 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6448 if self._match_set(self.PLACEHOLDER_PARSERS): 6449 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6450 if placeholder: 6451 return placeholder 6452 self._advance(-1) 6453 return None 6454 6455 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6456 if not self._match_texts(keywords): 6457 return None 6458 if self._match(TokenType.L_PAREN, advance=False): 6459 return self._parse_wrapped_csv(self._parse_expression) 6460 6461 expression = self._parse_expression() 6462 return [expression] if expression else None 6463 6464 def _parse_csv( 6465 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6466 ) -> t.List[exp.Expression]: 6467 parse_result = parse_method() 6468 items = [parse_result] if parse_result is not None else [] 6469 6470 while self._match(sep): 6471 self._add_comments(parse_result) 6472 parse_result = parse_method() 6473 if parse_result is not None: 6474 items.append(parse_result) 6475 6476 return items 6477 6478 def _parse_tokens( 6479 self, parse_method: t.Callable, expressions: t.Dict 6480 ) -> t.Optional[exp.Expression]: 6481 this = parse_method() 6482 6483 while self._match_set(expressions): 6484 this = self.expression( 6485 expressions[self._prev.token_type], 6486 this=this, 6487 comments=self._prev_comments, 6488 expression=parse_method(), 6489 ) 6490 6491 return this 6492 6493 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6494 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6495 6496 def _parse_wrapped_csv( 6497 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6498 ) -> t.List[exp.Expression]: 6499 return self._parse_wrapped( 6500 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6501 ) 6502 6503 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6504 wrapped = self._match(TokenType.L_PAREN) 6505 if not wrapped and not optional: 6506 self.raise_error("Expecting (") 6507 parse_result = parse_method() 6508 if wrapped: 6509 self._match_r_paren() 6510 return parse_result 6511 6512 def _parse_expressions(self) -> t.List[exp.Expression]: 6513 return self._parse_csv(self._parse_expression) 6514 6515 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6516 return self._parse_select() or self._parse_set_operations( 6517 self._parse_expression() if alias else self._parse_assignment() 6518 ) 6519 6520 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6521 return self._parse_query_modifiers( 6522 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6523 ) 6524 6525 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6526 this = None 6527 if self._match_texts(self.TRANSACTION_KIND): 6528 this = self._prev.text 6529 6530 self._match_texts(("TRANSACTION", "WORK")) 6531 6532 modes = [] 6533 while True: 6534 mode = [] 6535 while self._match(TokenType.VAR): 6536 mode.append(self._prev.text) 6537 6538 if mode: 6539 modes.append(" ".join(mode)) 6540 if not self._match(TokenType.COMMA): 6541 break 6542 6543 return self.expression(exp.Transaction, this=this, modes=modes) 6544 6545 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6546 chain = None 6547 savepoint = None 6548 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6549 6550 self._match_texts(("TRANSACTION", "WORK")) 6551 6552 if self._match_text_seq("TO"): 6553 self._match_text_seq("SAVEPOINT") 6554 savepoint = self._parse_id_var() 6555 6556 if self._match(TokenType.AND): 6557 chain = not self._match_text_seq("NO") 6558 self._match_text_seq("CHAIN") 6559 6560 if is_rollback: 6561 return self.expression(exp.Rollback, savepoint=savepoint) 6562 6563 return self.expression(exp.Commit, chain=chain) 6564 6565 def _parse_refresh(self) -> exp.Refresh: 6566 self._match(TokenType.TABLE) 6567 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6568 6569 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6570 if not self._match_text_seq("ADD"): 6571 return None 6572 6573 self._match(TokenType.COLUMN) 6574 exists_column = self._parse_exists(not_=True) 6575 expression = self._parse_field_def() 6576 6577 if expression: 6578 expression.set("exists", exists_column) 6579 6580 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6581 if self._match_texts(("FIRST", "AFTER")): 6582 position = self._prev.text 6583 column_position = self.expression( 6584 exp.ColumnPosition, this=self._parse_column(), position=position 6585 ) 6586 expression.set("position", column_position) 6587 6588 return expression 6589 6590 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6591 drop = self._match(TokenType.DROP) and self._parse_drop() 6592 if drop and not isinstance(drop, exp.Command): 6593 drop.set("kind", drop.args.get("kind", "COLUMN")) 6594 return drop 6595 6596 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6597 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6598 return self.expression( 6599 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6600 ) 6601 6602 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6603 index = self._index - 1 6604 6605 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6606 return self._parse_csv( 6607 lambda: self.expression( 6608 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6609 ) 6610 ) 6611 6612 self._retreat(index) 6613 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6614 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6615 6616 if self._match_text_seq("ADD", "COLUMNS"): 6617 schema = self._parse_schema() 6618 if schema: 6619 return [schema] 6620 return [] 6621 6622 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6623 6624 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6625 if self._match_texts(self.ALTER_ALTER_PARSERS): 6626 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6627 6628 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6629 # keyword after ALTER we default to parsing this statement 6630 self._match(TokenType.COLUMN) 6631 column = self._parse_field(any_token=True) 6632 6633 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6634 return self.expression(exp.AlterColumn, this=column, drop=True) 6635 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6636 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6637 if self._match(TokenType.COMMENT): 6638 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6639 if self._match_text_seq("DROP", "NOT", "NULL"): 6640 return self.expression( 6641 exp.AlterColumn, 6642 this=column, 6643 drop=True, 6644 allow_null=True, 6645 ) 6646 if self._match_text_seq("SET", "NOT", "NULL"): 6647 return self.expression( 6648 exp.AlterColumn, 6649 this=column, 6650 allow_null=False, 6651 ) 6652 self._match_text_seq("SET", "DATA") 6653 self._match_text_seq("TYPE") 6654 return self.expression( 6655 exp.AlterColumn, 6656 this=column, 6657 dtype=self._parse_types(), 6658 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6659 using=self._match(TokenType.USING) and self._parse_assignment(), 6660 ) 6661 6662 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6663 if self._match_texts(("ALL", "EVEN", "AUTO")): 6664 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6665 6666 self._match_text_seq("KEY", "DISTKEY") 6667 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6668 6669 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6670 if compound: 6671 self._match_text_seq("SORTKEY") 6672 6673 if self._match(TokenType.L_PAREN, advance=False): 6674 return self.expression( 6675 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6676 ) 6677 6678 self._match_texts(("AUTO", "NONE")) 6679 return self.expression( 6680 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6681 ) 6682 6683 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6684 index = self._index - 1 6685 6686 partition_exists = self._parse_exists() 6687 if self._match(TokenType.PARTITION, advance=False): 6688 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6689 6690 self._retreat(index) 6691 return self._parse_csv(self._parse_drop_column) 6692 6693 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6694 if self._match(TokenType.COLUMN): 6695 exists = self._parse_exists() 6696 old_column = self._parse_column() 6697 to = self._match_text_seq("TO") 6698 new_column = self._parse_column() 6699 6700 if old_column is None or to is None or new_column is None: 6701 return None 6702 6703 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6704 6705 self._match_text_seq("TO") 6706 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6707 6708 def _parse_alter_table_set(self) -> exp.AlterSet: 6709 alter_set = self.expression(exp.AlterSet) 6710 6711 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6712 "TABLE", "PROPERTIES" 6713 ): 6714 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6715 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6716 alter_set.set("expressions", [self._parse_assignment()]) 6717 elif self._match_texts(("LOGGED", "UNLOGGED")): 6718 alter_set.set("option", exp.var(self._prev.text.upper())) 6719 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6720 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6721 elif self._match_text_seq("LOCATION"): 6722 alter_set.set("location", self._parse_field()) 6723 elif self._match_text_seq("ACCESS", "METHOD"): 6724 alter_set.set("access_method", self._parse_field()) 6725 elif self._match_text_seq("TABLESPACE"): 6726 alter_set.set("tablespace", self._parse_field()) 6727 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6728 alter_set.set("file_format", [self._parse_field()]) 6729 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6730 alter_set.set("file_format", self._parse_wrapped_options()) 6731 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6732 alter_set.set("copy_options", self._parse_wrapped_options()) 6733 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6734 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6735 else: 6736 if self._match_text_seq("SERDE"): 6737 alter_set.set("serde", self._parse_field()) 6738 6739 alter_set.set("expressions", [self._parse_properties()]) 6740 6741 return alter_set 6742 6743 def _parse_alter(self) -> exp.Alter | exp.Command: 6744 start = self._prev 6745 6746 alter_token = self._match_set(self.ALTERABLES) and self._prev 6747 if not alter_token: 6748 return self._parse_as_command(start) 6749 6750 exists = self._parse_exists() 6751 only = self._match_text_seq("ONLY") 6752 this = self._parse_table(schema=True) 6753 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6754 6755 if self._next: 6756 self._advance() 6757 6758 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6759 if parser: 6760 actions = ensure_list(parser(self)) 6761 options = self._parse_csv(self._parse_property) 6762 6763 if not self._curr and actions: 6764 return self.expression( 6765 exp.Alter, 6766 this=this, 6767 kind=alter_token.text.upper(), 6768 exists=exists, 6769 actions=actions, 6770 only=only, 6771 options=options, 6772 cluster=cluster, 6773 ) 6774 6775 return self._parse_as_command(start) 6776 6777 def _parse_merge(self) -> exp.Merge: 6778 self._match(TokenType.INTO) 6779 target = self._parse_table() 6780 6781 if target and self._match(TokenType.ALIAS, advance=False): 6782 target.set("alias", self._parse_table_alias()) 6783 6784 self._match(TokenType.USING) 6785 using = self._parse_table() 6786 6787 self._match(TokenType.ON) 6788 on = self._parse_assignment() 6789 6790 return self.expression( 6791 exp.Merge, 6792 this=target, 6793 using=using, 6794 on=on, 6795 expressions=self._parse_when_matched(), 6796 ) 6797 6798 def _parse_when_matched(self) -> t.List[exp.When]: 6799 whens = [] 6800 6801 while self._match(TokenType.WHEN): 6802 matched = not self._match(TokenType.NOT) 6803 self._match_text_seq("MATCHED") 6804 source = ( 6805 False 6806 if self._match_text_seq("BY", "TARGET") 6807 else self._match_text_seq("BY", "SOURCE") 6808 ) 6809 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6810 6811 self._match(TokenType.THEN) 6812 6813 if self._match(TokenType.INSERT): 6814 _this = self._parse_star() 6815 if _this: 6816 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6817 else: 6818 then = self.expression( 6819 exp.Insert, 6820 this=self._parse_value(), 6821 expression=self._match_text_seq("VALUES") and self._parse_value(), 6822 ) 6823 elif self._match(TokenType.UPDATE): 6824 expressions = self._parse_star() 6825 if expressions: 6826 then = self.expression(exp.Update, expressions=expressions) 6827 else: 6828 then = self.expression( 6829 exp.Update, 6830 expressions=self._match(TokenType.SET) 6831 and self._parse_csv(self._parse_equality), 6832 ) 6833 elif self._match(TokenType.DELETE): 6834 then = self.expression(exp.Var, this=self._prev.text) 6835 else: 6836 then = None 6837 6838 whens.append( 6839 self.expression( 6840 exp.When, 6841 matched=matched, 6842 source=source, 6843 condition=condition, 6844 then=then, 6845 ) 6846 ) 6847 return whens 6848 6849 def _parse_show(self) -> t.Optional[exp.Expression]: 6850 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6851 if parser: 6852 return parser(self) 6853 return self._parse_as_command(self._prev) 6854 6855 def _parse_set_item_assignment( 6856 self, kind: t.Optional[str] = None 6857 ) -> t.Optional[exp.Expression]: 6858 index = self._index 6859 6860 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6861 return self._parse_set_transaction(global_=kind == "GLOBAL") 6862 6863 left = self._parse_primary() or self._parse_column() 6864 assignment_delimiter = self._match_texts(("=", "TO")) 6865 6866 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6867 self._retreat(index) 6868 return None 6869 6870 right = self._parse_statement() or self._parse_id_var() 6871 if isinstance(right, (exp.Column, exp.Identifier)): 6872 right = exp.var(right.name) 6873 6874 this = self.expression(exp.EQ, this=left, expression=right) 6875 return self.expression(exp.SetItem, this=this, kind=kind) 6876 6877 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6878 self._match_text_seq("TRANSACTION") 6879 characteristics = self._parse_csv( 6880 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6881 ) 6882 return self.expression( 6883 exp.SetItem, 6884 expressions=characteristics, 6885 kind="TRANSACTION", 6886 **{"global": global_}, # type: ignore 6887 ) 6888 6889 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6890 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6891 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6892 6893 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6894 index = self._index 6895 set_ = self.expression( 6896 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6897 ) 6898 6899 if self._curr: 6900 self._retreat(index) 6901 return self._parse_as_command(self._prev) 6902 6903 return set_ 6904 6905 def _parse_var_from_options( 6906 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6907 ) -> t.Optional[exp.Var]: 6908 start = self._curr 6909 if not start: 6910 return None 6911 6912 option = start.text.upper() 6913 continuations = options.get(option) 6914 6915 index = self._index 6916 self._advance() 6917 for keywords in continuations or []: 6918 if isinstance(keywords, str): 6919 keywords = (keywords,) 6920 6921 if self._match_text_seq(*keywords): 6922 option = f"{option} {' '.join(keywords)}" 6923 break 6924 else: 6925 if continuations or continuations is None: 6926 if raise_unmatched: 6927 self.raise_error(f"Unknown option {option}") 6928 6929 self._retreat(index) 6930 return None 6931 6932 return exp.var(option) 6933 6934 def _parse_as_command(self, start: Token) -> exp.Command: 6935 while self._curr: 6936 self._advance() 6937 text = self._find_sql(start, self._prev) 6938 size = len(start.text) 6939 self._warn_unsupported() 6940 return exp.Command(this=text[:size], expression=text[size:]) 6941 6942 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6943 settings = [] 6944 6945 self._match_l_paren() 6946 kind = self._parse_id_var() 6947 6948 if self._match(TokenType.L_PAREN): 6949 while True: 6950 key = self._parse_id_var() 6951 value = self._parse_primary() 6952 6953 if not key and value is None: 6954 break 6955 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6956 self._match(TokenType.R_PAREN) 6957 6958 self._match_r_paren() 6959 6960 return self.expression( 6961 exp.DictProperty, 6962 this=this, 6963 kind=kind.this if kind else None, 6964 settings=settings, 6965 ) 6966 6967 def _parse_dict_range(self, this: str) -> exp.DictRange: 6968 self._match_l_paren() 6969 has_min = self._match_text_seq("MIN") 6970 if has_min: 6971 min = self._parse_var() or self._parse_primary() 6972 self._match_text_seq("MAX") 6973 max = self._parse_var() or self._parse_primary() 6974 else: 6975 max = self._parse_var() or self._parse_primary() 6976 min = exp.Literal.number(0) 6977 self._match_r_paren() 6978 return self.expression(exp.DictRange, this=this, min=min, max=max) 6979 6980 def _parse_comprehension( 6981 self, this: t.Optional[exp.Expression] 6982 ) -> t.Optional[exp.Comprehension]: 6983 index = self._index 6984 expression = self._parse_column() 6985 if not self._match(TokenType.IN): 6986 self._retreat(index - 1) 6987 return None 6988 iterator = self._parse_column() 6989 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6990 return self.expression( 6991 exp.Comprehension, 6992 this=this, 6993 expression=expression, 6994 iterator=iterator, 6995 condition=condition, 6996 ) 6997 6998 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6999 if self._match(TokenType.HEREDOC_STRING): 7000 return self.expression(exp.Heredoc, this=self._prev.text) 7001 7002 if not self._match_text_seq("$"): 7003 return None 7004 7005 tags = ["$"] 7006 tag_text = None 7007 7008 if self._is_connected(): 7009 self._advance() 7010 tags.append(self._prev.text.upper()) 7011 else: 7012 self.raise_error("No closing $ found") 7013 7014 if tags[-1] != "$": 7015 if self._is_connected() and self._match_text_seq("$"): 7016 tag_text = tags[-1] 7017 tags.append("$") 7018 else: 7019 self.raise_error("No closing $ found") 7020 7021 heredoc_start = self._curr 7022 7023 while self._curr: 7024 if self._match_text_seq(*tags, advance=False): 7025 this = self._find_sql(heredoc_start, self._prev) 7026 self._advance(len(tags)) 7027 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7028 7029 self._advance() 7030 7031 self.raise_error(f"No closing {''.join(tags)} found") 7032 return None 7033 7034 def _find_parser( 7035 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7036 ) -> t.Optional[t.Callable]: 7037 if not self._curr: 7038 return None 7039 7040 index = self._index 7041 this = [] 7042 while True: 7043 # The current token might be multiple words 7044 curr = self._curr.text.upper() 7045 key = curr.split(" ") 7046 this.append(curr) 7047 7048 self._advance() 7049 result, trie = in_trie(trie, key) 7050 if result == TrieResult.FAILED: 7051 break 7052 7053 if result == TrieResult.EXISTS: 7054 subparser = parsers[" ".join(this)] 7055 return subparser 7056 7057 self._retreat(index) 7058 return None 7059 7060 def _match(self, token_type, advance=True, expression=None): 7061 if not self._curr: 7062 return None 7063 7064 if self._curr.token_type == token_type: 7065 if advance: 7066 self._advance() 7067 self._add_comments(expression) 7068 return True 7069 7070 return None 7071 7072 def _match_set(self, types, advance=True): 7073 if not self._curr: 7074 return None 7075 7076 if self._curr.token_type in types: 7077 if advance: 7078 self._advance() 7079 return True 7080 7081 return None 7082 7083 def _match_pair(self, token_type_a, token_type_b, advance=True): 7084 if not self._curr or not self._next: 7085 return None 7086 7087 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7088 if advance: 7089 self._advance(2) 7090 return True 7091 7092 return None 7093 7094 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7095 if not self._match(TokenType.L_PAREN, expression=expression): 7096 self.raise_error("Expecting (") 7097 7098 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7099 if not self._match(TokenType.R_PAREN, expression=expression): 7100 self.raise_error("Expecting )") 7101 7102 def _match_texts(self, texts, advance=True): 7103 if ( 7104 self._curr 7105 and self._curr.token_type != TokenType.STRING 7106 and self._curr.text.upper() in texts 7107 ): 7108 if advance: 7109 self._advance() 7110 return True 7111 return None 7112 7113 def _match_text_seq(self, *texts, advance=True): 7114 index = self._index 7115 for text in texts: 7116 if ( 7117 self._curr 7118 and self._curr.token_type != TokenType.STRING 7119 and self._curr.text.upper() == text 7120 ): 7121 self._advance() 7122 else: 7123 self._retreat(index) 7124 return None 7125 7126 if not advance: 7127 self._retreat(index) 7128 7129 return True 7130 7131 def _replace_lambda( 7132 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7133 ) -> t.Optional[exp.Expression]: 7134 if not node: 7135 return node 7136 7137 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7138 7139 for column in node.find_all(exp.Column): 7140 typ = lambda_types.get(column.parts[0].name) 7141 if typ is not None: 7142 dot_or_id = column.to_dot() if column.table else column.this 7143 7144 if typ: 7145 dot_or_id = self.expression( 7146 exp.Cast, 7147 this=dot_or_id, 7148 to=typ, 7149 ) 7150 7151 parent = column.parent 7152 7153 while isinstance(parent, exp.Dot): 7154 if not isinstance(parent.parent, exp.Dot): 7155 parent.replace(dot_or_id) 7156 break 7157 parent = parent.parent 7158 else: 7159 if column is node: 7160 node = dot_or_id 7161 else: 7162 column.replace(dot_or_id) 7163 return node 7164 7165 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7166 start = self._prev 7167 7168 # Not to be confused with TRUNCATE(number, decimals) function call 7169 if self._match(TokenType.L_PAREN): 7170 self._retreat(self._index - 2) 7171 return self._parse_function() 7172 7173 # Clickhouse supports TRUNCATE DATABASE as well 7174 is_database = self._match(TokenType.DATABASE) 7175 7176 self._match(TokenType.TABLE) 7177 7178 exists = self._parse_exists(not_=False) 7179 7180 expressions = self._parse_csv( 7181 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7182 ) 7183 7184 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7185 7186 if self._match_text_seq("RESTART", "IDENTITY"): 7187 identity = "RESTART" 7188 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7189 identity = "CONTINUE" 7190 else: 7191 identity = None 7192 7193 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7194 option = self._prev.text 7195 else: 7196 option = None 7197 7198 partition = self._parse_partition() 7199 7200 # Fallback case 7201 if self._curr: 7202 return self._parse_as_command(start) 7203 7204 return self.expression( 7205 exp.TruncateTable, 7206 expressions=expressions, 7207 is_database=is_database, 7208 exists=exists, 7209 cluster=cluster, 7210 identity=identity, 7211 option=option, 7212 partition=partition, 7213 ) 7214 7215 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7216 this = self._parse_ordered(self._parse_opclass) 7217 7218 if not self._match(TokenType.WITH): 7219 return this 7220 7221 op = self._parse_var(any_token=True) 7222 7223 return self.expression(exp.WithOperator, this=this, op=op) 7224 7225 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7226 self._match(TokenType.EQ) 7227 self._match(TokenType.L_PAREN) 7228 7229 opts: t.List[t.Optional[exp.Expression]] = [] 7230 while self._curr and not self._match(TokenType.R_PAREN): 7231 if self._match_text_seq("FORMAT_NAME", "="): 7232 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7233 # so we parse it separately to use _parse_field() 7234 prop = self.expression( 7235 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7236 ) 7237 opts.append(prop) 7238 else: 7239 opts.append(self._parse_property()) 7240 7241 self._match(TokenType.COMMA) 7242 7243 return opts 7244 7245 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7246 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7247 7248 options = [] 7249 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7250 option = self._parse_var(any_token=True) 7251 prev = self._prev.text.upper() 7252 7253 # Different dialects might separate options and values by white space, "=" and "AS" 7254 self._match(TokenType.EQ) 7255 self._match(TokenType.ALIAS) 7256 7257 param = self.expression(exp.CopyParameter, this=option) 7258 7259 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7260 TokenType.L_PAREN, advance=False 7261 ): 7262 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7263 param.set("expressions", self._parse_wrapped_options()) 7264 elif prev == "FILE_FORMAT": 7265 # T-SQL's external file format case 7266 param.set("expression", self._parse_field()) 7267 else: 7268 param.set("expression", self._parse_unquoted_field()) 7269 7270 options.append(param) 7271 self._match(sep) 7272 7273 return options 7274 7275 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7276 expr = self.expression(exp.Credentials) 7277 7278 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7279 expr.set("storage", self._parse_field()) 7280 if self._match_text_seq("CREDENTIALS"): 7281 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7282 creds = ( 7283 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7284 ) 7285 expr.set("credentials", creds) 7286 if self._match_text_seq("ENCRYPTION"): 7287 expr.set("encryption", self._parse_wrapped_options()) 7288 if self._match_text_seq("IAM_ROLE"): 7289 expr.set("iam_role", self._parse_field()) 7290 if self._match_text_seq("REGION"): 7291 expr.set("region", self._parse_field()) 7292 7293 return expr 7294 7295 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7296 return self._parse_field() 7297 7298 def _parse_copy(self) -> exp.Copy | exp.Command: 7299 start = self._prev 7300 7301 self._match(TokenType.INTO) 7302 7303 this = ( 7304 self._parse_select(nested=True, parse_subquery_alias=False) 7305 if self._match(TokenType.L_PAREN, advance=False) 7306 else self._parse_table(schema=True) 7307 ) 7308 7309 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7310 7311 files = self._parse_csv(self._parse_file_location) 7312 credentials = self._parse_credentials() 7313 7314 self._match_text_seq("WITH") 7315 7316 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7317 7318 # Fallback case 7319 if self._curr: 7320 return self._parse_as_command(start) 7321 7322 return self.expression( 7323 exp.Copy, 7324 this=this, 7325 kind=kind, 7326 credentials=credentials, 7327 files=files, 7328 params=params, 7329 ) 7330 7331 def _parse_normalize(self) -> exp.Normalize: 7332 return self.expression( 7333 exp.Normalize, 7334 this=self._parse_bitwise(), 7335 form=self._match(TokenType.COMMA) and self._parse_var(), 7336 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 192 "CONCAT": lambda args, dialect: exp.Concat( 193 expressions=args, 194 safe=not dialect.STRICT_STRING_CONCAT, 195 coalesce=dialect.CONCAT_COALESCE, 196 ), 197 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 198 expressions=args, 199 safe=not dialect.STRICT_STRING_CONCAT, 200 coalesce=dialect.CONCAT_COALESCE, 201 ), 202 "CONVERT_TIMEZONE": build_convert_timezone, 203 "DATE_TO_DATE_STR": lambda args: exp.Cast( 204 this=seq_get(args, 0), 205 to=exp.DataType(this=exp.DataType.Type.TEXT), 206 ), 207 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 208 start=seq_get(args, 0), 209 end=seq_get(args, 1), 210 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 211 ), 212 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 213 "HEX": build_hex, 214 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 215 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 216 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 217 "LIKE": build_like, 218 "LOG": build_logarithm, 219 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 220 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 221 "LOWER": build_lower, 222 "LPAD": lambda args: build_pad(args), 223 "LEFTPAD": lambda args: build_pad(args), 224 "LTRIM": lambda args: build_trim(args), 225 "MOD": build_mod, 226 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 227 "RPAD": lambda args: build_pad(args, is_left=False), 228 "RTRIM": lambda args: build_trim(args, is_left=False), 229 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 230 if len(args) != 2 231 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 232 "TIME_TO_TIME_STR": lambda args: exp.Cast( 233 this=seq_get(args, 0), 234 to=exp.DataType(this=exp.DataType.Type.TEXT), 235 ), 236 "TO_HEX": build_hex, 237 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 238 this=exp.Cast( 239 this=seq_get(args, 0), 240 to=exp.DataType(this=exp.DataType.Type.TEXT), 241 ), 242 start=exp.Literal.number(1), 243 length=exp.Literal.number(10), 244 ), 245 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 246 "UPPER": build_upper, 247 "VAR_MAP": build_var_map, 248 } 249 250 NO_PAREN_FUNCTIONS = { 251 TokenType.CURRENT_DATE: exp.CurrentDate, 252 TokenType.CURRENT_DATETIME: exp.CurrentDate, 253 TokenType.CURRENT_TIME: exp.CurrentTime, 254 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 255 TokenType.CURRENT_USER: exp.CurrentUser, 256 } 257 258 STRUCT_TYPE_TOKENS = { 259 TokenType.NESTED, 260 TokenType.OBJECT, 261 TokenType.STRUCT, 262 } 263 264 NESTED_TYPE_TOKENS = { 265 TokenType.ARRAY, 266 TokenType.LIST, 267 TokenType.LOWCARDINALITY, 268 TokenType.MAP, 269 TokenType.NULLABLE, 270 *STRUCT_TYPE_TOKENS, 271 } 272 273 ENUM_TYPE_TOKENS = { 274 TokenType.ENUM, 275 TokenType.ENUM8, 276 TokenType.ENUM16, 277 } 278 279 AGGREGATE_TYPE_TOKENS = { 280 TokenType.AGGREGATEFUNCTION, 281 TokenType.SIMPLEAGGREGATEFUNCTION, 282 } 283 284 TYPE_TOKENS = { 285 TokenType.BIT, 286 TokenType.BOOLEAN, 287 TokenType.TINYINT, 288 TokenType.UTINYINT, 289 TokenType.SMALLINT, 290 TokenType.USMALLINT, 291 TokenType.INT, 292 TokenType.UINT, 293 TokenType.BIGINT, 294 TokenType.UBIGINT, 295 TokenType.INT128, 296 TokenType.UINT128, 297 TokenType.INT256, 298 TokenType.UINT256, 299 TokenType.MEDIUMINT, 300 TokenType.UMEDIUMINT, 301 TokenType.FIXEDSTRING, 302 TokenType.FLOAT, 303 TokenType.DOUBLE, 304 TokenType.CHAR, 305 TokenType.NCHAR, 306 TokenType.VARCHAR, 307 TokenType.NVARCHAR, 308 TokenType.BPCHAR, 309 TokenType.TEXT, 310 TokenType.MEDIUMTEXT, 311 TokenType.LONGTEXT, 312 TokenType.MEDIUMBLOB, 313 TokenType.LONGBLOB, 314 TokenType.BINARY, 315 TokenType.VARBINARY, 316 TokenType.JSON, 317 TokenType.JSONB, 318 TokenType.INTERVAL, 319 TokenType.TINYBLOB, 320 TokenType.TINYTEXT, 321 TokenType.TIME, 322 TokenType.TIMETZ, 323 TokenType.TIMESTAMP, 324 TokenType.TIMESTAMP_S, 325 TokenType.TIMESTAMP_MS, 326 TokenType.TIMESTAMP_NS, 327 TokenType.TIMESTAMPTZ, 328 TokenType.TIMESTAMPLTZ, 329 TokenType.TIMESTAMPNTZ, 330 TokenType.DATETIME, 331 TokenType.DATETIME64, 332 TokenType.DATE, 333 TokenType.DATE32, 334 TokenType.INT4RANGE, 335 TokenType.INT4MULTIRANGE, 336 TokenType.INT8RANGE, 337 TokenType.INT8MULTIRANGE, 338 TokenType.NUMRANGE, 339 TokenType.NUMMULTIRANGE, 340 TokenType.TSRANGE, 341 TokenType.TSMULTIRANGE, 342 TokenType.TSTZRANGE, 343 TokenType.TSTZMULTIRANGE, 344 TokenType.DATERANGE, 345 TokenType.DATEMULTIRANGE, 346 TokenType.DECIMAL, 347 TokenType.DECIMAL32, 348 TokenType.DECIMAL64, 349 TokenType.DECIMAL128, 350 TokenType.UDECIMAL, 351 TokenType.BIGDECIMAL, 352 TokenType.UUID, 353 TokenType.GEOGRAPHY, 354 TokenType.GEOMETRY, 355 TokenType.HLLSKETCH, 356 TokenType.HSTORE, 357 TokenType.PSEUDO_TYPE, 358 TokenType.SUPER, 359 TokenType.SERIAL, 360 TokenType.SMALLSERIAL, 361 TokenType.BIGSERIAL, 362 TokenType.XML, 363 TokenType.YEAR, 364 TokenType.UNIQUEIDENTIFIER, 365 TokenType.USERDEFINED, 366 TokenType.MONEY, 367 TokenType.SMALLMONEY, 368 TokenType.ROWVERSION, 369 TokenType.IMAGE, 370 TokenType.VARIANT, 371 TokenType.VECTOR, 372 TokenType.OBJECT, 373 TokenType.OBJECT_IDENTIFIER, 374 TokenType.INET, 375 TokenType.IPADDRESS, 376 TokenType.IPPREFIX, 377 TokenType.IPV4, 378 TokenType.IPV6, 379 TokenType.UNKNOWN, 380 TokenType.NULL, 381 TokenType.NAME, 382 TokenType.TDIGEST, 383 *ENUM_TYPE_TOKENS, 384 *NESTED_TYPE_TOKENS, 385 *AGGREGATE_TYPE_TOKENS, 386 } 387 388 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 389 TokenType.BIGINT: TokenType.UBIGINT, 390 TokenType.INT: TokenType.UINT, 391 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 392 TokenType.SMALLINT: TokenType.USMALLINT, 393 TokenType.TINYINT: TokenType.UTINYINT, 394 TokenType.DECIMAL: TokenType.UDECIMAL, 395 } 396 397 SUBQUERY_PREDICATES = { 398 TokenType.ANY: exp.Any, 399 TokenType.ALL: exp.All, 400 TokenType.EXISTS: exp.Exists, 401 TokenType.SOME: exp.Any, 402 } 403 404 RESERVED_TOKENS = { 405 *Tokenizer.SINGLE_TOKENS.values(), 406 TokenType.SELECT, 407 } - {TokenType.IDENTIFIER} 408 409 DB_CREATABLES = { 410 TokenType.DATABASE, 411 TokenType.DICTIONARY, 412 TokenType.MODEL, 413 TokenType.SCHEMA, 414 TokenType.SEQUENCE, 415 TokenType.STORAGE_INTEGRATION, 416 TokenType.TABLE, 417 TokenType.TAG, 418 TokenType.VIEW, 419 TokenType.WAREHOUSE, 420 TokenType.STREAMLIT, 421 } 422 423 CREATABLES = { 424 TokenType.COLUMN, 425 TokenType.CONSTRAINT, 426 TokenType.FOREIGN_KEY, 427 TokenType.FUNCTION, 428 TokenType.INDEX, 429 TokenType.PROCEDURE, 430 *DB_CREATABLES, 431 } 432 433 ALTERABLES = { 434 TokenType.TABLE, 435 TokenType.VIEW, 436 } 437 438 # Tokens that can represent identifiers 439 ID_VAR_TOKENS = { 440 TokenType.ALL, 441 TokenType.VAR, 442 TokenType.ANTI, 443 TokenType.APPLY, 444 TokenType.ASC, 445 TokenType.ASOF, 446 TokenType.AUTO_INCREMENT, 447 TokenType.BEGIN, 448 TokenType.BPCHAR, 449 TokenType.CACHE, 450 TokenType.CASE, 451 TokenType.COLLATE, 452 TokenType.COMMAND, 453 TokenType.COMMENT, 454 TokenType.COMMIT, 455 TokenType.CONSTRAINT, 456 TokenType.COPY, 457 TokenType.CUBE, 458 TokenType.DEFAULT, 459 TokenType.DELETE, 460 TokenType.DESC, 461 TokenType.DESCRIBE, 462 TokenType.DICTIONARY, 463 TokenType.DIV, 464 TokenType.END, 465 TokenType.EXECUTE, 466 TokenType.ESCAPE, 467 TokenType.FALSE, 468 TokenType.FIRST, 469 TokenType.FILTER, 470 TokenType.FINAL, 471 TokenType.FORMAT, 472 TokenType.FULL, 473 TokenType.IDENTIFIER, 474 TokenType.IS, 475 TokenType.ISNULL, 476 TokenType.INTERVAL, 477 TokenType.KEEP, 478 TokenType.KILL, 479 TokenType.LEFT, 480 TokenType.LOAD, 481 TokenType.MERGE, 482 TokenType.NATURAL, 483 TokenType.NEXT, 484 TokenType.OFFSET, 485 TokenType.OPERATOR, 486 TokenType.ORDINALITY, 487 TokenType.OVERLAPS, 488 TokenType.OVERWRITE, 489 TokenType.PARTITION, 490 TokenType.PERCENT, 491 TokenType.PIVOT, 492 TokenType.PRAGMA, 493 TokenType.RANGE, 494 TokenType.RECURSIVE, 495 TokenType.REFERENCES, 496 TokenType.REFRESH, 497 TokenType.RENAME, 498 TokenType.REPLACE, 499 TokenType.RIGHT, 500 TokenType.ROLLUP, 501 TokenType.ROW, 502 TokenType.ROWS, 503 TokenType.SEMI, 504 TokenType.SET, 505 TokenType.SETTINGS, 506 TokenType.SHOW, 507 TokenType.TEMPORARY, 508 TokenType.TOP, 509 TokenType.TRUE, 510 TokenType.TRUNCATE, 511 TokenType.UNIQUE, 512 TokenType.UNNEST, 513 TokenType.UNPIVOT, 514 TokenType.UPDATE, 515 TokenType.USE, 516 TokenType.VOLATILE, 517 TokenType.WINDOW, 518 *CREATABLES, 519 *SUBQUERY_PREDICATES, 520 *TYPE_TOKENS, 521 *NO_PAREN_FUNCTIONS, 522 } 523 524 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 525 526 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 527 TokenType.ANTI, 528 TokenType.APPLY, 529 TokenType.ASOF, 530 TokenType.FULL, 531 TokenType.LEFT, 532 TokenType.LOCK, 533 TokenType.NATURAL, 534 TokenType.OFFSET, 535 TokenType.RIGHT, 536 TokenType.SEMI, 537 TokenType.WINDOW, 538 } 539 540 ALIAS_TOKENS = ID_VAR_TOKENS 541 542 ARRAY_CONSTRUCTORS = { 543 "ARRAY": exp.Array, 544 "LIST": exp.List, 545 } 546 547 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 548 549 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 550 551 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 552 553 FUNC_TOKENS = { 554 TokenType.COLLATE, 555 TokenType.COMMAND, 556 TokenType.CURRENT_DATE, 557 TokenType.CURRENT_DATETIME, 558 TokenType.CURRENT_TIMESTAMP, 559 TokenType.CURRENT_TIME, 560 TokenType.CURRENT_USER, 561 TokenType.FILTER, 562 TokenType.FIRST, 563 TokenType.FORMAT, 564 TokenType.GLOB, 565 TokenType.IDENTIFIER, 566 TokenType.INDEX, 567 TokenType.ISNULL, 568 TokenType.ILIKE, 569 TokenType.INSERT, 570 TokenType.LIKE, 571 TokenType.MERGE, 572 TokenType.OFFSET, 573 TokenType.PRIMARY_KEY, 574 TokenType.RANGE, 575 TokenType.REPLACE, 576 TokenType.RLIKE, 577 TokenType.ROW, 578 TokenType.UNNEST, 579 TokenType.VAR, 580 TokenType.LEFT, 581 TokenType.RIGHT, 582 TokenType.SEQUENCE, 583 TokenType.DATE, 584 TokenType.DATETIME, 585 TokenType.TABLE, 586 TokenType.TIMESTAMP, 587 TokenType.TIMESTAMPTZ, 588 TokenType.TRUNCATE, 589 TokenType.WINDOW, 590 TokenType.XOR, 591 *TYPE_TOKENS, 592 *SUBQUERY_PREDICATES, 593 } 594 595 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 596 TokenType.AND: exp.And, 597 } 598 599 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 600 TokenType.COLON_EQ: exp.PropertyEQ, 601 } 602 603 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 604 TokenType.OR: exp.Or, 605 } 606 607 EQUALITY = { 608 TokenType.EQ: exp.EQ, 609 TokenType.NEQ: exp.NEQ, 610 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 611 } 612 613 COMPARISON = { 614 TokenType.GT: exp.GT, 615 TokenType.GTE: exp.GTE, 616 TokenType.LT: exp.LT, 617 TokenType.LTE: exp.LTE, 618 } 619 620 BITWISE = { 621 TokenType.AMP: exp.BitwiseAnd, 622 TokenType.CARET: exp.BitwiseXor, 623 TokenType.PIPE: exp.BitwiseOr, 624 } 625 626 TERM = { 627 TokenType.DASH: exp.Sub, 628 TokenType.PLUS: exp.Add, 629 TokenType.MOD: exp.Mod, 630 TokenType.COLLATE: exp.Collate, 631 } 632 633 FACTOR = { 634 TokenType.DIV: exp.IntDiv, 635 TokenType.LR_ARROW: exp.Distance, 636 TokenType.SLASH: exp.Div, 637 TokenType.STAR: exp.Mul, 638 } 639 640 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 641 642 TIMES = { 643 TokenType.TIME, 644 TokenType.TIMETZ, 645 } 646 647 TIMESTAMPS = { 648 TokenType.TIMESTAMP, 649 TokenType.TIMESTAMPTZ, 650 TokenType.TIMESTAMPLTZ, 651 *TIMES, 652 } 653 654 SET_OPERATIONS = { 655 TokenType.UNION, 656 TokenType.INTERSECT, 657 TokenType.EXCEPT, 658 } 659 660 JOIN_METHODS = { 661 TokenType.ASOF, 662 TokenType.NATURAL, 663 TokenType.POSITIONAL, 664 } 665 666 JOIN_SIDES = { 667 TokenType.LEFT, 668 TokenType.RIGHT, 669 TokenType.FULL, 670 } 671 672 JOIN_KINDS = { 673 TokenType.ANTI, 674 TokenType.CROSS, 675 TokenType.INNER, 676 TokenType.OUTER, 677 TokenType.SEMI, 678 TokenType.STRAIGHT_JOIN, 679 } 680 681 JOIN_HINTS: t.Set[str] = set() 682 683 LAMBDAS = { 684 TokenType.ARROW: lambda self, expressions: self.expression( 685 exp.Lambda, 686 this=self._replace_lambda( 687 self._parse_assignment(), 688 expressions, 689 ), 690 expressions=expressions, 691 ), 692 TokenType.FARROW: lambda self, expressions: self.expression( 693 exp.Kwarg, 694 this=exp.var(expressions[0].name), 695 expression=self._parse_assignment(), 696 ), 697 } 698 699 COLUMN_OPERATORS = { 700 TokenType.DOT: None, 701 TokenType.DCOLON: lambda self, this, to: self.expression( 702 exp.Cast if self.STRICT_CAST else exp.TryCast, 703 this=this, 704 to=to, 705 ), 706 TokenType.ARROW: lambda self, this, path: self.expression( 707 exp.JSONExtract, 708 this=this, 709 expression=self.dialect.to_json_path(path), 710 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 711 ), 712 TokenType.DARROW: lambda self, this, path: self.expression( 713 exp.JSONExtractScalar, 714 this=this, 715 expression=self.dialect.to_json_path(path), 716 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 717 ), 718 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 719 exp.JSONBExtract, 720 this=this, 721 expression=path, 722 ), 723 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 724 exp.JSONBExtractScalar, 725 this=this, 726 expression=path, 727 ), 728 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 729 exp.JSONBContains, 730 this=this, 731 expression=key, 732 ), 733 } 734 735 EXPRESSION_PARSERS = { 736 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 737 exp.Column: lambda self: self._parse_column(), 738 exp.Condition: lambda self: self._parse_assignment(), 739 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 740 exp.Expression: lambda self: self._parse_expression(), 741 exp.From: lambda self: self._parse_from(joins=True), 742 exp.Group: lambda self: self._parse_group(), 743 exp.Having: lambda self: self._parse_having(), 744 exp.Identifier: lambda self: self._parse_id_var(), 745 exp.Join: lambda self: self._parse_join(), 746 exp.Lambda: lambda self: self._parse_lambda(), 747 exp.Lateral: lambda self: self._parse_lateral(), 748 exp.Limit: lambda self: self._parse_limit(), 749 exp.Offset: lambda self: self._parse_offset(), 750 exp.Order: lambda self: self._parse_order(), 751 exp.Ordered: lambda self: self._parse_ordered(), 752 exp.Properties: lambda self: self._parse_properties(), 753 exp.Qualify: lambda self: self._parse_qualify(), 754 exp.Returning: lambda self: self._parse_returning(), 755 exp.Select: lambda self: self._parse_select(), 756 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 757 exp.Table: lambda self: self._parse_table_parts(), 758 exp.TableAlias: lambda self: self._parse_table_alias(), 759 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 760 exp.Where: lambda self: self._parse_where(), 761 exp.Window: lambda self: self._parse_named_window(), 762 exp.With: lambda self: self._parse_with(), 763 "JOIN_TYPE": lambda self: self._parse_join_parts(), 764 } 765 766 STATEMENT_PARSERS = { 767 TokenType.ALTER: lambda self: self._parse_alter(), 768 TokenType.BEGIN: lambda self: self._parse_transaction(), 769 TokenType.CACHE: lambda self: self._parse_cache(), 770 TokenType.COMMENT: lambda self: self._parse_comment(), 771 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 772 TokenType.COPY: lambda self: self._parse_copy(), 773 TokenType.CREATE: lambda self: self._parse_create(), 774 TokenType.DELETE: lambda self: self._parse_delete(), 775 TokenType.DESC: lambda self: self._parse_describe(), 776 TokenType.DESCRIBE: lambda self: self._parse_describe(), 777 TokenType.DROP: lambda self: self._parse_drop(), 778 TokenType.INSERT: lambda self: self._parse_insert(), 779 TokenType.KILL: lambda self: self._parse_kill(), 780 TokenType.LOAD: lambda self: self._parse_load(), 781 TokenType.MERGE: lambda self: self._parse_merge(), 782 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 783 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 784 TokenType.REFRESH: lambda self: self._parse_refresh(), 785 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 786 TokenType.SET: lambda self: self._parse_set(), 787 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 788 TokenType.UNCACHE: lambda self: self._parse_uncache(), 789 TokenType.UPDATE: lambda self: self._parse_update(), 790 TokenType.USE: lambda self: self.expression( 791 exp.Use, 792 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 793 this=self._parse_table(schema=False), 794 ), 795 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 796 } 797 798 UNARY_PARSERS = { 799 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 800 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 801 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 802 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 803 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 804 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 805 } 806 807 STRING_PARSERS = { 808 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 809 exp.RawString, this=token.text 810 ), 811 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 812 exp.National, this=token.text 813 ), 814 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 815 TokenType.STRING: lambda self, token: self.expression( 816 exp.Literal, this=token.text, is_string=True 817 ), 818 TokenType.UNICODE_STRING: lambda self, token: self.expression( 819 exp.UnicodeString, 820 this=token.text, 821 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 822 ), 823 } 824 825 NUMERIC_PARSERS = { 826 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 827 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 828 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 829 TokenType.NUMBER: lambda self, token: self.expression( 830 exp.Literal, this=token.text, is_string=False 831 ), 832 } 833 834 PRIMARY_PARSERS = { 835 **STRING_PARSERS, 836 **NUMERIC_PARSERS, 837 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 838 TokenType.NULL: lambda self, _: self.expression(exp.Null), 839 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 840 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 841 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 842 TokenType.STAR: lambda self, _: self.expression( 843 exp.Star, 844 **{ 845 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 846 "replace": self._parse_star_op("REPLACE"), 847 "rename": self._parse_star_op("RENAME"), 848 }, 849 ), 850 } 851 852 PLACEHOLDER_PARSERS = { 853 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 854 TokenType.PARAMETER: lambda self: self._parse_parameter(), 855 TokenType.COLON: lambda self: ( 856 self.expression(exp.Placeholder, this=self._prev.text) 857 if self._match_set(self.ID_VAR_TOKENS) 858 else None 859 ), 860 } 861 862 RANGE_PARSERS = { 863 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 864 TokenType.GLOB: binary_range_parser(exp.Glob), 865 TokenType.ILIKE: binary_range_parser(exp.ILike), 866 TokenType.IN: lambda self, this: self._parse_in(this), 867 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 868 TokenType.IS: lambda self, this: self._parse_is(this), 869 TokenType.LIKE: binary_range_parser(exp.Like), 870 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 871 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 872 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 873 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 874 } 875 876 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 877 "ALLOWED_VALUES": lambda self: self.expression( 878 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 879 ), 880 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 881 "AUTO": lambda self: self._parse_auto_property(), 882 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 883 "BACKUP": lambda self: self.expression( 884 exp.BackupProperty, this=self._parse_var(any_token=True) 885 ), 886 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 887 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 888 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 889 "CHECKSUM": lambda self: self._parse_checksum(), 890 "CLUSTER BY": lambda self: self._parse_cluster(), 891 "CLUSTERED": lambda self: self._parse_clustered_by(), 892 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 893 exp.CollateProperty, **kwargs 894 ), 895 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 896 "CONTAINS": lambda self: self._parse_contains_property(), 897 "COPY": lambda self: self._parse_copy_property(), 898 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 899 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 900 "DEFINER": lambda self: self._parse_definer(), 901 "DETERMINISTIC": lambda self: self.expression( 902 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 903 ), 904 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 905 "DUPLICATE": lambda self: self._parse_duplicate(), 906 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 907 "DISTKEY": lambda self: self._parse_distkey(), 908 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 909 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 910 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 911 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 912 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 913 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 914 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 915 "FREESPACE": lambda self: self._parse_freespace(), 916 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 917 "HEAP": lambda self: self.expression(exp.HeapProperty), 918 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 919 "IMMUTABLE": lambda self: self.expression( 920 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 921 ), 922 "INHERITS": lambda self: self.expression( 923 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 924 ), 925 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 926 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 927 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 928 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 929 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 930 "LIKE": lambda self: self._parse_create_like(), 931 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 932 "LOCK": lambda self: self._parse_locking(), 933 "LOCKING": lambda self: self._parse_locking(), 934 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 935 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 936 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 937 "MODIFIES": lambda self: self._parse_modifies_property(), 938 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 939 "NO": lambda self: self._parse_no_property(), 940 "ON": lambda self: self._parse_on_property(), 941 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 942 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 943 "PARTITION": lambda self: self._parse_partitioned_of(), 944 "PARTITION BY": lambda self: self._parse_partitioned_by(), 945 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 946 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 947 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 948 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 949 "READS": lambda self: self._parse_reads_property(), 950 "REMOTE": lambda self: self._parse_remote_with_connection(), 951 "RETURNS": lambda self: self._parse_returns(), 952 "STRICT": lambda self: self.expression(exp.StrictProperty), 953 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 954 "ROW": lambda self: self._parse_row(), 955 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 956 "SAMPLE": lambda self: self.expression( 957 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 958 ), 959 "SECURE": lambda self: self.expression(exp.SecureProperty), 960 "SECURITY": lambda self: self._parse_security(), 961 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 962 "SETTINGS": lambda self: self._parse_settings_property(), 963 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 964 "SORTKEY": lambda self: self._parse_sortkey(), 965 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 966 "STABLE": lambda self: self.expression( 967 exp.StabilityProperty, this=exp.Literal.string("STABLE") 968 ), 969 "STORED": lambda self: self._parse_stored(), 970 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 971 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 972 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 973 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 974 "TO": lambda self: self._parse_to_table(), 975 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 976 "TRANSFORM": lambda self: self.expression( 977 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 978 ), 979 "TTL": lambda self: self._parse_ttl(), 980 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 981 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 982 "VOLATILE": lambda self: self._parse_volatile_property(), 983 "WITH": lambda self: self._parse_with_property(), 984 } 985 986 CONSTRAINT_PARSERS = { 987 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 988 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 989 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 990 "CHARACTER SET": lambda self: self.expression( 991 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 992 ), 993 "CHECK": lambda self: self.expression( 994 exp.CheckColumnConstraint, 995 this=self._parse_wrapped(self._parse_assignment), 996 enforced=self._match_text_seq("ENFORCED"), 997 ), 998 "COLLATE": lambda self: self.expression( 999 exp.CollateColumnConstraint, 1000 this=self._parse_identifier() or self._parse_column(), 1001 ), 1002 "COMMENT": lambda self: self.expression( 1003 exp.CommentColumnConstraint, this=self._parse_string() 1004 ), 1005 "COMPRESS": lambda self: self._parse_compress(), 1006 "CLUSTERED": lambda self: self.expression( 1007 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1008 ), 1009 "NONCLUSTERED": lambda self: self.expression( 1010 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1011 ), 1012 "DEFAULT": lambda self: self.expression( 1013 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1014 ), 1015 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1016 "EPHEMERAL": lambda self: self.expression( 1017 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1018 ), 1019 "EXCLUDE": lambda self: self.expression( 1020 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1021 ), 1022 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1023 "FORMAT": lambda self: self.expression( 1024 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1025 ), 1026 "GENERATED": lambda self: self._parse_generated_as_identity(), 1027 "IDENTITY": lambda self: self._parse_auto_increment(), 1028 "INLINE": lambda self: self._parse_inline(), 1029 "LIKE": lambda self: self._parse_create_like(), 1030 "NOT": lambda self: self._parse_not_constraint(), 1031 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1032 "ON": lambda self: ( 1033 self._match(TokenType.UPDATE) 1034 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1035 ) 1036 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1037 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1038 "PERIOD": lambda self: self._parse_period_for_system_time(), 1039 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1040 "REFERENCES": lambda self: self._parse_references(match=False), 1041 "TITLE": lambda self: self.expression( 1042 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1043 ), 1044 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1045 "UNIQUE": lambda self: self._parse_unique(), 1046 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1047 "WITH": lambda self: self.expression( 1048 exp.Properties, expressions=self._parse_wrapped_properties() 1049 ), 1050 } 1051 1052 ALTER_PARSERS = { 1053 "ADD": lambda self: self._parse_alter_table_add(), 1054 "ALTER": lambda self: self._parse_alter_table_alter(), 1055 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1056 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1057 "DROP": lambda self: self._parse_alter_table_drop(), 1058 "RENAME": lambda self: self._parse_alter_table_rename(), 1059 "SET": lambda self: self._parse_alter_table_set(), 1060 "AS": lambda self: self._parse_select(), 1061 } 1062 1063 ALTER_ALTER_PARSERS = { 1064 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1065 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1066 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1067 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1068 } 1069 1070 SCHEMA_UNNAMED_CONSTRAINTS = { 1071 "CHECK", 1072 "EXCLUDE", 1073 "FOREIGN KEY", 1074 "LIKE", 1075 "PERIOD", 1076 "PRIMARY KEY", 1077 "UNIQUE", 1078 } 1079 1080 NO_PAREN_FUNCTION_PARSERS = { 1081 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1082 "CASE": lambda self: self._parse_case(), 1083 "CONNECT_BY_ROOT": lambda self: self.expression( 1084 exp.ConnectByRoot, this=self._parse_column() 1085 ), 1086 "IF": lambda self: self._parse_if(), 1087 "NEXT": lambda self: self._parse_next_value_for(), 1088 } 1089 1090 INVALID_FUNC_NAME_TOKENS = { 1091 TokenType.IDENTIFIER, 1092 TokenType.STRING, 1093 } 1094 1095 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1096 1097 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1098 1099 FUNCTION_PARSERS = { 1100 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1101 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1102 "DECODE": lambda self: self._parse_decode(), 1103 "EXTRACT": lambda self: self._parse_extract(), 1104 "GAP_FILL": lambda self: self._parse_gap_fill(), 1105 "JSON_OBJECT": lambda self: self._parse_json_object(), 1106 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1107 "JSON_TABLE": lambda self: self._parse_json_table(), 1108 "MATCH": lambda self: self._parse_match_against(), 1109 "NORMALIZE": lambda self: self._parse_normalize(), 1110 "OPENJSON": lambda self: self._parse_open_json(), 1111 "POSITION": lambda self: self._parse_position(), 1112 "PREDICT": lambda self: self._parse_predict(), 1113 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1114 "STRING_AGG": lambda self: self._parse_string_agg(), 1115 "SUBSTRING": lambda self: self._parse_substring(), 1116 "TRIM": lambda self: self._parse_trim(), 1117 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1118 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1119 } 1120 1121 QUERY_MODIFIER_PARSERS = { 1122 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1123 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1124 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1125 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1126 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1127 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1128 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1129 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1130 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1131 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1132 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1133 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1134 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1135 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1136 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1137 TokenType.CLUSTER_BY: lambda self: ( 1138 "cluster", 1139 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1140 ), 1141 TokenType.DISTRIBUTE_BY: lambda self: ( 1142 "distribute", 1143 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1144 ), 1145 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1146 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1147 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1148 } 1149 1150 SET_PARSERS = { 1151 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1152 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1153 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1154 "TRANSACTION": lambda self: self._parse_set_transaction(), 1155 } 1156 1157 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1158 1159 TYPE_LITERAL_PARSERS = { 1160 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1161 } 1162 1163 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1164 1165 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1166 1167 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1168 1169 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1170 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1171 "ISOLATION": ( 1172 ("LEVEL", "REPEATABLE", "READ"), 1173 ("LEVEL", "READ", "COMMITTED"), 1174 ("LEVEL", "READ", "UNCOMITTED"), 1175 ("LEVEL", "SERIALIZABLE"), 1176 ), 1177 "READ": ("WRITE", "ONLY"), 1178 } 1179 1180 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1181 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1182 ) 1183 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1184 1185 CREATE_SEQUENCE: OPTIONS_TYPE = { 1186 "SCALE": ("EXTEND", "NOEXTEND"), 1187 "SHARD": ("EXTEND", "NOEXTEND"), 1188 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1189 **dict.fromkeys( 1190 ( 1191 "SESSION", 1192 "GLOBAL", 1193 "KEEP", 1194 "NOKEEP", 1195 "ORDER", 1196 "NOORDER", 1197 "NOCACHE", 1198 "CYCLE", 1199 "NOCYCLE", 1200 "NOMINVALUE", 1201 "NOMAXVALUE", 1202 "NOSCALE", 1203 "NOSHARD", 1204 ), 1205 tuple(), 1206 ), 1207 } 1208 1209 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1210 1211 USABLES: OPTIONS_TYPE = dict.fromkeys( 1212 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1213 ) 1214 1215 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1216 1217 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1218 "TYPE": ("EVOLUTION",), 1219 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1220 } 1221 1222 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1223 "NOT": ("ENFORCED",), 1224 "MATCH": ( 1225 "FULL", 1226 "PARTIAL", 1227 "SIMPLE", 1228 ), 1229 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1230 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1231 } 1232 1233 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1234 1235 CLONE_KEYWORDS = {"CLONE", "COPY"} 1236 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1237 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1238 1239 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1240 1241 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1242 1243 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1244 1245 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1246 1247 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1248 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1249 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1250 1251 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1252 1253 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1254 1255 ADD_CONSTRAINT_TOKENS = { 1256 TokenType.CONSTRAINT, 1257 TokenType.FOREIGN_KEY, 1258 TokenType.INDEX, 1259 TokenType.KEY, 1260 TokenType.PRIMARY_KEY, 1261 TokenType.UNIQUE, 1262 } 1263 1264 DISTINCT_TOKENS = {TokenType.DISTINCT} 1265 1266 NULL_TOKENS = {TokenType.NULL} 1267 1268 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1269 1270 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1271 1272 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1273 1274 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1275 1276 ODBC_DATETIME_LITERALS = { 1277 "d": exp.Date, 1278 "t": exp.Time, 1279 "ts": exp.Timestamp, 1280 } 1281 1282 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1283 1284 STRICT_CAST = True 1285 1286 PREFIXED_PIVOT_COLUMNS = False 1287 IDENTIFY_PIVOT_STRINGS = False 1288 1289 LOG_DEFAULTS_TO_LN = False 1290 1291 # Whether ADD is present for each column added by ALTER TABLE 1292 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1293 1294 # Whether the table sample clause expects CSV syntax 1295 TABLESAMPLE_CSV = False 1296 1297 # The default method used for table sampling 1298 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1299 1300 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1301 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1302 1303 # Whether the TRIM function expects the characters to trim as its first argument 1304 TRIM_PATTERN_FIRST = False 1305 1306 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1307 STRING_ALIASES = False 1308 1309 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1310 MODIFIERS_ATTACHED_TO_SET_OP = True 1311 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1312 1313 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1314 NO_PAREN_IF_COMMANDS = True 1315 1316 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1317 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1318 1319 # Whether the `:` operator is used to extract a value from a VARIANT column 1320 COLON_IS_VARIANT_EXTRACT = False 1321 1322 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1323 # If this is True and '(' is not found, the keyword will be treated as an identifier 1324 VALUES_FOLLOWED_BY_PAREN = True 1325 1326 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1327 SUPPORTS_IMPLICIT_UNNEST = False 1328 1329 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1330 INTERVAL_SPANS = True 1331 1332 # Whether a PARTITION clause can follow a table reference 1333 SUPPORTS_PARTITION_SELECTION = False 1334 1335 __slots__ = ( 1336 "error_level", 1337 "error_message_context", 1338 "max_errors", 1339 "dialect", 1340 "sql", 1341 "errors", 1342 "_tokens", 1343 "_index", 1344 "_curr", 1345 "_next", 1346 "_prev", 1347 "_prev_comments", 1348 ) 1349 1350 # Autofilled 1351 SHOW_TRIE: t.Dict = {} 1352 SET_TRIE: t.Dict = {} 1353 1354 def __init__( 1355 self, 1356 error_level: t.Optional[ErrorLevel] = None, 1357 error_message_context: int = 100, 1358 max_errors: int = 3, 1359 dialect: DialectType = None, 1360 ): 1361 from sqlglot.dialects import Dialect 1362 1363 self.error_level = error_level or ErrorLevel.IMMEDIATE 1364 self.error_message_context = error_message_context 1365 self.max_errors = max_errors 1366 self.dialect = Dialect.get_or_raise(dialect) 1367 self.reset() 1368 1369 def reset(self): 1370 self.sql = "" 1371 self.errors = [] 1372 self._tokens = [] 1373 self._index = 0 1374 self._curr = None 1375 self._next = None 1376 self._prev = None 1377 self._prev_comments = None 1378 1379 def parse( 1380 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1381 ) -> t.List[t.Optional[exp.Expression]]: 1382 """ 1383 Parses a list of tokens and returns a list of syntax trees, one tree 1384 per parsed SQL statement. 1385 1386 Args: 1387 raw_tokens: The list of tokens. 1388 sql: The original SQL string, used to produce helpful debug messages. 1389 1390 Returns: 1391 The list of the produced syntax trees. 1392 """ 1393 return self._parse( 1394 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1395 ) 1396 1397 def parse_into( 1398 self, 1399 expression_types: exp.IntoType, 1400 raw_tokens: t.List[Token], 1401 sql: t.Optional[str] = None, 1402 ) -> t.List[t.Optional[exp.Expression]]: 1403 """ 1404 Parses a list of tokens into a given Expression type. If a collection of Expression 1405 types is given instead, this method will try to parse the token list into each one 1406 of them, stopping at the first for which the parsing succeeds. 1407 1408 Args: 1409 expression_types: The expression type(s) to try and parse the token list into. 1410 raw_tokens: The list of tokens. 1411 sql: The original SQL string, used to produce helpful debug messages. 1412 1413 Returns: 1414 The target Expression. 1415 """ 1416 errors = [] 1417 for expression_type in ensure_list(expression_types): 1418 parser = self.EXPRESSION_PARSERS.get(expression_type) 1419 if not parser: 1420 raise TypeError(f"No parser registered for {expression_type}") 1421 1422 try: 1423 return self._parse(parser, raw_tokens, sql) 1424 except ParseError as e: 1425 e.errors[0]["into_expression"] = expression_type 1426 errors.append(e) 1427 1428 raise ParseError( 1429 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1430 errors=merge_errors(errors), 1431 ) from errors[-1] 1432 1433 def _parse( 1434 self, 1435 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1436 raw_tokens: t.List[Token], 1437 sql: t.Optional[str] = None, 1438 ) -> t.List[t.Optional[exp.Expression]]: 1439 self.reset() 1440 self.sql = sql or "" 1441 1442 total = len(raw_tokens) 1443 chunks: t.List[t.List[Token]] = [[]] 1444 1445 for i, token in enumerate(raw_tokens): 1446 if token.token_type == TokenType.SEMICOLON: 1447 if token.comments: 1448 chunks.append([token]) 1449 1450 if i < total - 1: 1451 chunks.append([]) 1452 else: 1453 chunks[-1].append(token) 1454 1455 expressions = [] 1456 1457 for tokens in chunks: 1458 self._index = -1 1459 self._tokens = tokens 1460 self._advance() 1461 1462 expressions.append(parse_method(self)) 1463 1464 if self._index < len(self._tokens): 1465 self.raise_error("Invalid expression / Unexpected token") 1466 1467 self.check_errors() 1468 1469 return expressions 1470 1471 def check_errors(self) -> None: 1472 """Logs or raises any found errors, depending on the chosen error level setting.""" 1473 if self.error_level == ErrorLevel.WARN: 1474 for error in self.errors: 1475 logger.error(str(error)) 1476 elif self.error_level == ErrorLevel.RAISE and self.errors: 1477 raise ParseError( 1478 concat_messages(self.errors, self.max_errors), 1479 errors=merge_errors(self.errors), 1480 ) 1481 1482 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1483 """ 1484 Appends an error in the list of recorded errors or raises it, depending on the chosen 1485 error level setting. 1486 """ 1487 token = token or self._curr or self._prev or Token.string("") 1488 start = token.start 1489 end = token.end + 1 1490 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1491 highlight = self.sql[start:end] 1492 end_context = self.sql[end : end + self.error_message_context] 1493 1494 error = ParseError.new( 1495 f"{message}. Line {token.line}, Col: {token.col}.\n" 1496 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1497 description=message, 1498 line=token.line, 1499 col=token.col, 1500 start_context=start_context, 1501 highlight=highlight, 1502 end_context=end_context, 1503 ) 1504 1505 if self.error_level == ErrorLevel.IMMEDIATE: 1506 raise error 1507 1508 self.errors.append(error) 1509 1510 def expression( 1511 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1512 ) -> E: 1513 """ 1514 Creates a new, validated Expression. 1515 1516 Args: 1517 exp_class: The expression class to instantiate. 1518 comments: An optional list of comments to attach to the expression. 1519 kwargs: The arguments to set for the expression along with their respective values. 1520 1521 Returns: 1522 The target expression. 1523 """ 1524 instance = exp_class(**kwargs) 1525 instance.add_comments(comments) if comments else self._add_comments(instance) 1526 return self.validate_expression(instance) 1527 1528 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1529 if expression and self._prev_comments: 1530 expression.add_comments(self._prev_comments) 1531 self._prev_comments = None 1532 1533 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1534 """ 1535 Validates an Expression, making sure that all its mandatory arguments are set. 1536 1537 Args: 1538 expression: The expression to validate. 1539 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1540 1541 Returns: 1542 The validated expression. 1543 """ 1544 if self.error_level != ErrorLevel.IGNORE: 1545 for error_message in expression.error_messages(args): 1546 self.raise_error(error_message) 1547 1548 return expression 1549 1550 def _find_sql(self, start: Token, end: Token) -> str: 1551 return self.sql[start.start : end.end + 1] 1552 1553 def _is_connected(self) -> bool: 1554 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1555 1556 def _advance(self, times: int = 1) -> None: 1557 self._index += times 1558 self._curr = seq_get(self._tokens, self._index) 1559 self._next = seq_get(self._tokens, self._index + 1) 1560 1561 if self._index > 0: 1562 self._prev = self._tokens[self._index - 1] 1563 self._prev_comments = self._prev.comments 1564 else: 1565 self._prev = None 1566 self._prev_comments = None 1567 1568 def _retreat(self, index: int) -> None: 1569 if index != self._index: 1570 self._advance(index - self._index) 1571 1572 def _warn_unsupported(self) -> None: 1573 if len(self._tokens) <= 1: 1574 return 1575 1576 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1577 # interested in emitting a warning for the one being currently processed. 1578 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1579 1580 logger.warning( 1581 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1582 ) 1583 1584 def _parse_command(self) -> exp.Command: 1585 self._warn_unsupported() 1586 return self.expression( 1587 exp.Command, 1588 comments=self._prev_comments, 1589 this=self._prev.text.upper(), 1590 expression=self._parse_string(), 1591 ) 1592 1593 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1594 """ 1595 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1596 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1597 solve this by setting & resetting the parser state accordingly 1598 """ 1599 index = self._index 1600 error_level = self.error_level 1601 1602 self.error_level = ErrorLevel.IMMEDIATE 1603 try: 1604 this = parse_method() 1605 except ParseError: 1606 this = None 1607 finally: 1608 if not this or retreat: 1609 self._retreat(index) 1610 self.error_level = error_level 1611 1612 return this 1613 1614 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1615 start = self._prev 1616 exists = self._parse_exists() if allow_exists else None 1617 1618 self._match(TokenType.ON) 1619 1620 materialized = self._match_text_seq("MATERIALIZED") 1621 kind = self._match_set(self.CREATABLES) and self._prev 1622 if not kind: 1623 return self._parse_as_command(start) 1624 1625 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1626 this = self._parse_user_defined_function(kind=kind.token_type) 1627 elif kind.token_type == TokenType.TABLE: 1628 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1629 elif kind.token_type == TokenType.COLUMN: 1630 this = self._parse_column() 1631 else: 1632 this = self._parse_id_var() 1633 1634 self._match(TokenType.IS) 1635 1636 return self.expression( 1637 exp.Comment, 1638 this=this, 1639 kind=kind.text, 1640 expression=self._parse_string(), 1641 exists=exists, 1642 materialized=materialized, 1643 ) 1644 1645 def _parse_to_table( 1646 self, 1647 ) -> exp.ToTableProperty: 1648 table = self._parse_table_parts(schema=True) 1649 return self.expression(exp.ToTableProperty, this=table) 1650 1651 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1652 def _parse_ttl(self) -> exp.Expression: 1653 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1654 this = self._parse_bitwise() 1655 1656 if self._match_text_seq("DELETE"): 1657 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1658 if self._match_text_seq("RECOMPRESS"): 1659 return self.expression( 1660 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1661 ) 1662 if self._match_text_seq("TO", "DISK"): 1663 return self.expression( 1664 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1665 ) 1666 if self._match_text_seq("TO", "VOLUME"): 1667 return self.expression( 1668 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1669 ) 1670 1671 return this 1672 1673 expressions = self._parse_csv(_parse_ttl_action) 1674 where = self._parse_where() 1675 group = self._parse_group() 1676 1677 aggregates = None 1678 if group and self._match(TokenType.SET): 1679 aggregates = self._parse_csv(self._parse_set_item) 1680 1681 return self.expression( 1682 exp.MergeTreeTTL, 1683 expressions=expressions, 1684 where=where, 1685 group=group, 1686 aggregates=aggregates, 1687 ) 1688 1689 def _parse_statement(self) -> t.Optional[exp.Expression]: 1690 if self._curr is None: 1691 return None 1692 1693 if self._match_set(self.STATEMENT_PARSERS): 1694 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1695 1696 if self._match_set(self.dialect.tokenizer.COMMANDS): 1697 return self._parse_command() 1698 1699 expression = self._parse_expression() 1700 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1701 return self._parse_query_modifiers(expression) 1702 1703 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1704 start = self._prev 1705 temporary = self._match(TokenType.TEMPORARY) 1706 materialized = self._match_text_seq("MATERIALIZED") 1707 1708 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1709 if not kind: 1710 return self._parse_as_command(start) 1711 1712 concurrently = self._match_text_seq("CONCURRENTLY") 1713 if_exists = exists or self._parse_exists() 1714 table = self._parse_table_parts( 1715 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1716 ) 1717 1718 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1719 1720 if self._match(TokenType.L_PAREN, advance=False): 1721 expressions = self._parse_wrapped_csv(self._parse_types) 1722 else: 1723 expressions = None 1724 1725 return self.expression( 1726 exp.Drop, 1727 comments=start.comments, 1728 exists=if_exists, 1729 this=table, 1730 expressions=expressions, 1731 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1732 temporary=temporary, 1733 materialized=materialized, 1734 cascade=self._match_text_seq("CASCADE"), 1735 constraints=self._match_text_seq("CONSTRAINTS"), 1736 purge=self._match_text_seq("PURGE"), 1737 cluster=cluster, 1738 concurrently=concurrently, 1739 ) 1740 1741 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1742 return ( 1743 self._match_text_seq("IF") 1744 and (not not_ or self._match(TokenType.NOT)) 1745 and self._match(TokenType.EXISTS) 1746 ) 1747 1748 def _parse_create(self) -> exp.Create | exp.Command: 1749 # Note: this can't be None because we've matched a statement parser 1750 start = self._prev 1751 comments = self._prev_comments 1752 1753 replace = ( 1754 start.token_type == TokenType.REPLACE 1755 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1756 or self._match_pair(TokenType.OR, TokenType.ALTER) 1757 ) 1758 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1759 1760 unique = self._match(TokenType.UNIQUE) 1761 1762 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1763 clustered = True 1764 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1765 "COLUMNSTORE" 1766 ): 1767 clustered = False 1768 else: 1769 clustered = None 1770 1771 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1772 self._advance() 1773 1774 properties = None 1775 create_token = self._match_set(self.CREATABLES) and self._prev 1776 1777 if not create_token: 1778 # exp.Properties.Location.POST_CREATE 1779 properties = self._parse_properties() 1780 create_token = self._match_set(self.CREATABLES) and self._prev 1781 1782 if not properties or not create_token: 1783 return self._parse_as_command(start) 1784 1785 concurrently = self._match_text_seq("CONCURRENTLY") 1786 exists = self._parse_exists(not_=True) 1787 this = None 1788 expression: t.Optional[exp.Expression] = None 1789 indexes = None 1790 no_schema_binding = None 1791 begin = None 1792 end = None 1793 clone = None 1794 1795 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1796 nonlocal properties 1797 if properties and temp_props: 1798 properties.expressions.extend(temp_props.expressions) 1799 elif temp_props: 1800 properties = temp_props 1801 1802 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1803 this = self._parse_user_defined_function(kind=create_token.token_type) 1804 1805 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1806 extend_props(self._parse_properties()) 1807 1808 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1809 extend_props(self._parse_properties()) 1810 1811 if not expression: 1812 if self._match(TokenType.COMMAND): 1813 expression = self._parse_as_command(self._prev) 1814 else: 1815 begin = self._match(TokenType.BEGIN) 1816 return_ = self._match_text_seq("RETURN") 1817 1818 if self._match(TokenType.STRING, advance=False): 1819 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1820 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1821 expression = self._parse_string() 1822 extend_props(self._parse_properties()) 1823 else: 1824 expression = self._parse_statement() 1825 1826 end = self._match_text_seq("END") 1827 1828 if return_: 1829 expression = self.expression(exp.Return, this=expression) 1830 elif create_token.token_type == TokenType.INDEX: 1831 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1832 if not self._match(TokenType.ON): 1833 index = self._parse_id_var() 1834 anonymous = False 1835 else: 1836 index = None 1837 anonymous = True 1838 1839 this = self._parse_index(index=index, anonymous=anonymous) 1840 elif create_token.token_type in self.DB_CREATABLES: 1841 table_parts = self._parse_table_parts( 1842 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1843 ) 1844 1845 # exp.Properties.Location.POST_NAME 1846 self._match(TokenType.COMMA) 1847 extend_props(self._parse_properties(before=True)) 1848 1849 this = self._parse_schema(this=table_parts) 1850 1851 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1852 extend_props(self._parse_properties()) 1853 1854 self._match(TokenType.ALIAS) 1855 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1856 # exp.Properties.Location.POST_ALIAS 1857 extend_props(self._parse_properties()) 1858 1859 if create_token.token_type == TokenType.SEQUENCE: 1860 expression = self._parse_types() 1861 extend_props(self._parse_properties()) 1862 else: 1863 expression = self._parse_ddl_select() 1864 1865 if create_token.token_type == TokenType.TABLE: 1866 # exp.Properties.Location.POST_EXPRESSION 1867 extend_props(self._parse_properties()) 1868 1869 indexes = [] 1870 while True: 1871 index = self._parse_index() 1872 1873 # exp.Properties.Location.POST_INDEX 1874 extend_props(self._parse_properties()) 1875 if not index: 1876 break 1877 else: 1878 self._match(TokenType.COMMA) 1879 indexes.append(index) 1880 elif create_token.token_type == TokenType.VIEW: 1881 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1882 no_schema_binding = True 1883 1884 shallow = self._match_text_seq("SHALLOW") 1885 1886 if self._match_texts(self.CLONE_KEYWORDS): 1887 copy = self._prev.text.lower() == "copy" 1888 clone = self.expression( 1889 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1890 ) 1891 1892 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1893 return self._parse_as_command(start) 1894 1895 create_kind_text = create_token.text.upper() 1896 return self.expression( 1897 exp.Create, 1898 comments=comments, 1899 this=this, 1900 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1901 replace=replace, 1902 refresh=refresh, 1903 unique=unique, 1904 expression=expression, 1905 exists=exists, 1906 properties=properties, 1907 indexes=indexes, 1908 no_schema_binding=no_schema_binding, 1909 begin=begin, 1910 end=end, 1911 clone=clone, 1912 concurrently=concurrently, 1913 clustered=clustered, 1914 ) 1915 1916 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1917 seq = exp.SequenceProperties() 1918 1919 options = [] 1920 index = self._index 1921 1922 while self._curr: 1923 self._match(TokenType.COMMA) 1924 if self._match_text_seq("INCREMENT"): 1925 self._match_text_seq("BY") 1926 self._match_text_seq("=") 1927 seq.set("increment", self._parse_term()) 1928 elif self._match_text_seq("MINVALUE"): 1929 seq.set("minvalue", self._parse_term()) 1930 elif self._match_text_seq("MAXVALUE"): 1931 seq.set("maxvalue", self._parse_term()) 1932 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1933 self._match_text_seq("=") 1934 seq.set("start", self._parse_term()) 1935 elif self._match_text_seq("CACHE"): 1936 # T-SQL allows empty CACHE which is initialized dynamically 1937 seq.set("cache", self._parse_number() or True) 1938 elif self._match_text_seq("OWNED", "BY"): 1939 # "OWNED BY NONE" is the default 1940 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1941 else: 1942 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1943 if opt: 1944 options.append(opt) 1945 else: 1946 break 1947 1948 seq.set("options", options if options else None) 1949 return None if self._index == index else seq 1950 1951 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1952 # only used for teradata currently 1953 self._match(TokenType.COMMA) 1954 1955 kwargs = { 1956 "no": self._match_text_seq("NO"), 1957 "dual": self._match_text_seq("DUAL"), 1958 "before": self._match_text_seq("BEFORE"), 1959 "default": self._match_text_seq("DEFAULT"), 1960 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1961 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1962 "after": self._match_text_seq("AFTER"), 1963 "minimum": self._match_texts(("MIN", "MINIMUM")), 1964 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1965 } 1966 1967 if self._match_texts(self.PROPERTY_PARSERS): 1968 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1969 try: 1970 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1971 except TypeError: 1972 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1973 1974 return None 1975 1976 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1977 return self._parse_wrapped_csv(self._parse_property) 1978 1979 def _parse_property(self) -> t.Optional[exp.Expression]: 1980 if self._match_texts(self.PROPERTY_PARSERS): 1981 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1982 1983 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1984 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1985 1986 if self._match_text_seq("COMPOUND", "SORTKEY"): 1987 return self._parse_sortkey(compound=True) 1988 1989 if self._match_text_seq("SQL", "SECURITY"): 1990 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1991 1992 index = self._index 1993 key = self._parse_column() 1994 1995 if not self._match(TokenType.EQ): 1996 self._retreat(index) 1997 return self._parse_sequence_properties() 1998 1999 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2000 if isinstance(key, exp.Column): 2001 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2002 2003 value = self._parse_bitwise() or self._parse_var(any_token=True) 2004 2005 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2006 if isinstance(value, exp.Column): 2007 value = exp.var(value.name) 2008 2009 return self.expression(exp.Property, this=key, value=value) 2010 2011 def _parse_stored(self) -> exp.FileFormatProperty: 2012 self._match(TokenType.ALIAS) 2013 2014 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2015 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2016 2017 return self.expression( 2018 exp.FileFormatProperty, 2019 this=( 2020 self.expression( 2021 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2022 ) 2023 if input_format or output_format 2024 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2025 ), 2026 ) 2027 2028 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2029 field = self._parse_field() 2030 if isinstance(field, exp.Identifier) and not field.quoted: 2031 field = exp.var(field) 2032 2033 return field 2034 2035 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2036 self._match(TokenType.EQ) 2037 self._match(TokenType.ALIAS) 2038 2039 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2040 2041 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2042 properties = [] 2043 while True: 2044 if before: 2045 prop = self._parse_property_before() 2046 else: 2047 prop = self._parse_property() 2048 if not prop: 2049 break 2050 for p in ensure_list(prop): 2051 properties.append(p) 2052 2053 if properties: 2054 return self.expression(exp.Properties, expressions=properties) 2055 2056 return None 2057 2058 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2059 return self.expression( 2060 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2061 ) 2062 2063 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2064 if self._match_texts(("DEFINER", "INVOKER")): 2065 security_specifier = self._prev.text.upper() 2066 return self.expression(exp.SecurityProperty, this=security_specifier) 2067 return None 2068 2069 def _parse_settings_property(self) -> exp.SettingsProperty: 2070 return self.expression( 2071 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2072 ) 2073 2074 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2075 if self._index >= 2: 2076 pre_volatile_token = self._tokens[self._index - 2] 2077 else: 2078 pre_volatile_token = None 2079 2080 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2081 return exp.VolatileProperty() 2082 2083 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2084 2085 def _parse_retention_period(self) -> exp.Var: 2086 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2087 number = self._parse_number() 2088 number_str = f"{number} " if number else "" 2089 unit = self._parse_var(any_token=True) 2090 return exp.var(f"{number_str}{unit}") 2091 2092 def _parse_system_versioning_property( 2093 self, with_: bool = False 2094 ) -> exp.WithSystemVersioningProperty: 2095 self._match(TokenType.EQ) 2096 prop = self.expression( 2097 exp.WithSystemVersioningProperty, 2098 **{ # type: ignore 2099 "on": True, 2100 "with": with_, 2101 }, 2102 ) 2103 2104 if self._match_text_seq("OFF"): 2105 prop.set("on", False) 2106 return prop 2107 2108 self._match(TokenType.ON) 2109 if self._match(TokenType.L_PAREN): 2110 while self._curr and not self._match(TokenType.R_PAREN): 2111 if self._match_text_seq("HISTORY_TABLE", "="): 2112 prop.set("this", self._parse_table_parts()) 2113 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2114 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2115 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2116 prop.set("retention_period", self._parse_retention_period()) 2117 2118 self._match(TokenType.COMMA) 2119 2120 return prop 2121 2122 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2123 self._match(TokenType.EQ) 2124 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2125 prop = self.expression(exp.DataDeletionProperty, on=on) 2126 2127 if self._match(TokenType.L_PAREN): 2128 while self._curr and not self._match(TokenType.R_PAREN): 2129 if self._match_text_seq("FILTER_COLUMN", "="): 2130 prop.set("filter_column", self._parse_column()) 2131 elif self._match_text_seq("RETENTION_PERIOD", "="): 2132 prop.set("retention_period", self._parse_retention_period()) 2133 2134 self._match(TokenType.COMMA) 2135 2136 return prop 2137 2138 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2139 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2140 prop = self._parse_system_versioning_property(with_=True) 2141 self._match_r_paren() 2142 return prop 2143 2144 if self._match(TokenType.L_PAREN, advance=False): 2145 return self._parse_wrapped_properties() 2146 2147 if self._match_text_seq("JOURNAL"): 2148 return self._parse_withjournaltable() 2149 2150 if self._match_texts(self.VIEW_ATTRIBUTES): 2151 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2152 2153 if self._match_text_seq("DATA"): 2154 return self._parse_withdata(no=False) 2155 elif self._match_text_seq("NO", "DATA"): 2156 return self._parse_withdata(no=True) 2157 2158 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2159 return self._parse_serde_properties(with_=True) 2160 2161 if self._match(TokenType.SCHEMA): 2162 return self.expression( 2163 exp.WithSchemaBindingProperty, 2164 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2165 ) 2166 2167 if not self._next: 2168 return None 2169 2170 return self._parse_withisolatedloading() 2171 2172 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2173 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2174 self._match(TokenType.EQ) 2175 2176 user = self._parse_id_var() 2177 self._match(TokenType.PARAMETER) 2178 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2179 2180 if not user or not host: 2181 return None 2182 2183 return exp.DefinerProperty(this=f"{user}@{host}") 2184 2185 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2186 self._match(TokenType.TABLE) 2187 self._match(TokenType.EQ) 2188 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2189 2190 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2191 return self.expression(exp.LogProperty, no=no) 2192 2193 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2194 return self.expression(exp.JournalProperty, **kwargs) 2195 2196 def _parse_checksum(self) -> exp.ChecksumProperty: 2197 self._match(TokenType.EQ) 2198 2199 on = None 2200 if self._match(TokenType.ON): 2201 on = True 2202 elif self._match_text_seq("OFF"): 2203 on = False 2204 2205 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2206 2207 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2208 return self.expression( 2209 exp.Cluster, 2210 expressions=( 2211 self._parse_wrapped_csv(self._parse_ordered) 2212 if wrapped 2213 else self._parse_csv(self._parse_ordered) 2214 ), 2215 ) 2216 2217 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2218 self._match_text_seq("BY") 2219 2220 self._match_l_paren() 2221 expressions = self._parse_csv(self._parse_column) 2222 self._match_r_paren() 2223 2224 if self._match_text_seq("SORTED", "BY"): 2225 self._match_l_paren() 2226 sorted_by = self._parse_csv(self._parse_ordered) 2227 self._match_r_paren() 2228 else: 2229 sorted_by = None 2230 2231 self._match(TokenType.INTO) 2232 buckets = self._parse_number() 2233 self._match_text_seq("BUCKETS") 2234 2235 return self.expression( 2236 exp.ClusteredByProperty, 2237 expressions=expressions, 2238 sorted_by=sorted_by, 2239 buckets=buckets, 2240 ) 2241 2242 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2243 if not self._match_text_seq("GRANTS"): 2244 self._retreat(self._index - 1) 2245 return None 2246 2247 return self.expression(exp.CopyGrantsProperty) 2248 2249 def _parse_freespace(self) -> exp.FreespaceProperty: 2250 self._match(TokenType.EQ) 2251 return self.expression( 2252 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2253 ) 2254 2255 def _parse_mergeblockratio( 2256 self, no: bool = False, default: bool = False 2257 ) -> exp.MergeBlockRatioProperty: 2258 if self._match(TokenType.EQ): 2259 return self.expression( 2260 exp.MergeBlockRatioProperty, 2261 this=self._parse_number(), 2262 percent=self._match(TokenType.PERCENT), 2263 ) 2264 2265 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2266 2267 def _parse_datablocksize( 2268 self, 2269 default: t.Optional[bool] = None, 2270 minimum: t.Optional[bool] = None, 2271 maximum: t.Optional[bool] = None, 2272 ) -> exp.DataBlocksizeProperty: 2273 self._match(TokenType.EQ) 2274 size = self._parse_number() 2275 2276 units = None 2277 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2278 units = self._prev.text 2279 2280 return self.expression( 2281 exp.DataBlocksizeProperty, 2282 size=size, 2283 units=units, 2284 default=default, 2285 minimum=minimum, 2286 maximum=maximum, 2287 ) 2288 2289 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2290 self._match(TokenType.EQ) 2291 always = self._match_text_seq("ALWAYS") 2292 manual = self._match_text_seq("MANUAL") 2293 never = self._match_text_seq("NEVER") 2294 default = self._match_text_seq("DEFAULT") 2295 2296 autotemp = None 2297 if self._match_text_seq("AUTOTEMP"): 2298 autotemp = self._parse_schema() 2299 2300 return self.expression( 2301 exp.BlockCompressionProperty, 2302 always=always, 2303 manual=manual, 2304 never=never, 2305 default=default, 2306 autotemp=autotemp, 2307 ) 2308 2309 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2310 index = self._index 2311 no = self._match_text_seq("NO") 2312 concurrent = self._match_text_seq("CONCURRENT") 2313 2314 if not self._match_text_seq("ISOLATED", "LOADING"): 2315 self._retreat(index) 2316 return None 2317 2318 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2319 return self.expression( 2320 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2321 ) 2322 2323 def _parse_locking(self) -> exp.LockingProperty: 2324 if self._match(TokenType.TABLE): 2325 kind = "TABLE" 2326 elif self._match(TokenType.VIEW): 2327 kind = "VIEW" 2328 elif self._match(TokenType.ROW): 2329 kind = "ROW" 2330 elif self._match_text_seq("DATABASE"): 2331 kind = "DATABASE" 2332 else: 2333 kind = None 2334 2335 if kind in ("DATABASE", "TABLE", "VIEW"): 2336 this = self._parse_table_parts() 2337 else: 2338 this = None 2339 2340 if self._match(TokenType.FOR): 2341 for_or_in = "FOR" 2342 elif self._match(TokenType.IN): 2343 for_or_in = "IN" 2344 else: 2345 for_or_in = None 2346 2347 if self._match_text_seq("ACCESS"): 2348 lock_type = "ACCESS" 2349 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2350 lock_type = "EXCLUSIVE" 2351 elif self._match_text_seq("SHARE"): 2352 lock_type = "SHARE" 2353 elif self._match_text_seq("READ"): 2354 lock_type = "READ" 2355 elif self._match_text_seq("WRITE"): 2356 lock_type = "WRITE" 2357 elif self._match_text_seq("CHECKSUM"): 2358 lock_type = "CHECKSUM" 2359 else: 2360 lock_type = None 2361 2362 override = self._match_text_seq("OVERRIDE") 2363 2364 return self.expression( 2365 exp.LockingProperty, 2366 this=this, 2367 kind=kind, 2368 for_or_in=for_or_in, 2369 lock_type=lock_type, 2370 override=override, 2371 ) 2372 2373 def _parse_partition_by(self) -> t.List[exp.Expression]: 2374 if self._match(TokenType.PARTITION_BY): 2375 return self._parse_csv(self._parse_assignment) 2376 return [] 2377 2378 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2379 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2380 if self._match_text_seq("MINVALUE"): 2381 return exp.var("MINVALUE") 2382 if self._match_text_seq("MAXVALUE"): 2383 return exp.var("MAXVALUE") 2384 return self._parse_bitwise() 2385 2386 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2387 expression = None 2388 from_expressions = None 2389 to_expressions = None 2390 2391 if self._match(TokenType.IN): 2392 this = self._parse_wrapped_csv(self._parse_bitwise) 2393 elif self._match(TokenType.FROM): 2394 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2395 self._match_text_seq("TO") 2396 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2397 elif self._match_text_seq("WITH", "(", "MODULUS"): 2398 this = self._parse_number() 2399 self._match_text_seq(",", "REMAINDER") 2400 expression = self._parse_number() 2401 self._match_r_paren() 2402 else: 2403 self.raise_error("Failed to parse partition bound spec.") 2404 2405 return self.expression( 2406 exp.PartitionBoundSpec, 2407 this=this, 2408 expression=expression, 2409 from_expressions=from_expressions, 2410 to_expressions=to_expressions, 2411 ) 2412 2413 # https://www.postgresql.org/docs/current/sql-createtable.html 2414 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2415 if not self._match_text_seq("OF"): 2416 self._retreat(self._index - 1) 2417 return None 2418 2419 this = self._parse_table(schema=True) 2420 2421 if self._match(TokenType.DEFAULT): 2422 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2423 elif self._match_text_seq("FOR", "VALUES"): 2424 expression = self._parse_partition_bound_spec() 2425 else: 2426 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2427 2428 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2429 2430 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2431 self._match(TokenType.EQ) 2432 return self.expression( 2433 exp.PartitionedByProperty, 2434 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2435 ) 2436 2437 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2438 if self._match_text_seq("AND", "STATISTICS"): 2439 statistics = True 2440 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2441 statistics = False 2442 else: 2443 statistics = None 2444 2445 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2446 2447 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2448 if self._match_text_seq("SQL"): 2449 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2450 return None 2451 2452 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2453 if self._match_text_seq("SQL", "DATA"): 2454 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2455 return None 2456 2457 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2458 if self._match_text_seq("PRIMARY", "INDEX"): 2459 return exp.NoPrimaryIndexProperty() 2460 if self._match_text_seq("SQL"): 2461 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2462 return None 2463 2464 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2465 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2466 return exp.OnCommitProperty() 2467 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2468 return exp.OnCommitProperty(delete=True) 2469 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2470 2471 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2472 if self._match_text_seq("SQL", "DATA"): 2473 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2474 return None 2475 2476 def _parse_distkey(self) -> exp.DistKeyProperty: 2477 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2478 2479 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2480 table = self._parse_table(schema=True) 2481 2482 options = [] 2483 while self._match_texts(("INCLUDING", "EXCLUDING")): 2484 this = self._prev.text.upper() 2485 2486 id_var = self._parse_id_var() 2487 if not id_var: 2488 return None 2489 2490 options.append( 2491 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2492 ) 2493 2494 return self.expression(exp.LikeProperty, this=table, expressions=options) 2495 2496 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2497 return self.expression( 2498 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2499 ) 2500 2501 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2502 self._match(TokenType.EQ) 2503 return self.expression( 2504 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2505 ) 2506 2507 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2508 self._match_text_seq("WITH", "CONNECTION") 2509 return self.expression( 2510 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2511 ) 2512 2513 def _parse_returns(self) -> exp.ReturnsProperty: 2514 value: t.Optional[exp.Expression] 2515 null = None 2516 is_table = self._match(TokenType.TABLE) 2517 2518 if is_table: 2519 if self._match(TokenType.LT): 2520 value = self.expression( 2521 exp.Schema, 2522 this="TABLE", 2523 expressions=self._parse_csv(self._parse_struct_types), 2524 ) 2525 if not self._match(TokenType.GT): 2526 self.raise_error("Expecting >") 2527 else: 2528 value = self._parse_schema(exp.var("TABLE")) 2529 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2530 null = True 2531 value = None 2532 else: 2533 value = self._parse_types() 2534 2535 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2536 2537 def _parse_describe(self) -> exp.Describe: 2538 kind = self._match_set(self.CREATABLES) and self._prev.text 2539 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2540 if self._match(TokenType.DOT): 2541 style = None 2542 self._retreat(self._index - 2) 2543 this = self._parse_table(schema=True) 2544 properties = self._parse_properties() 2545 expressions = properties.expressions if properties else None 2546 partition = self._parse_partition() 2547 return self.expression( 2548 exp.Describe, 2549 this=this, 2550 style=style, 2551 kind=kind, 2552 expressions=expressions, 2553 partition=partition, 2554 ) 2555 2556 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2557 kind = self._prev.text.upper() 2558 expressions = [] 2559 2560 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2561 if self._match(TokenType.WHEN): 2562 expression = self._parse_disjunction() 2563 self._match(TokenType.THEN) 2564 else: 2565 expression = None 2566 2567 else_ = self._match(TokenType.ELSE) 2568 2569 if not self._match(TokenType.INTO): 2570 return None 2571 2572 return self.expression( 2573 exp.ConditionalInsert, 2574 this=self.expression( 2575 exp.Insert, 2576 this=self._parse_table(schema=True), 2577 expression=self._parse_derived_table_values(), 2578 ), 2579 expression=expression, 2580 else_=else_, 2581 ) 2582 2583 expression = parse_conditional_insert() 2584 while expression is not None: 2585 expressions.append(expression) 2586 expression = parse_conditional_insert() 2587 2588 return self.expression( 2589 exp.MultitableInserts, 2590 kind=kind, 2591 comments=comments, 2592 expressions=expressions, 2593 source=self._parse_table(), 2594 ) 2595 2596 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2597 comments = ensure_list(self._prev_comments) 2598 hint = self._parse_hint() 2599 overwrite = self._match(TokenType.OVERWRITE) 2600 ignore = self._match(TokenType.IGNORE) 2601 local = self._match_text_seq("LOCAL") 2602 alternative = None 2603 is_function = None 2604 2605 if self._match_text_seq("DIRECTORY"): 2606 this: t.Optional[exp.Expression] = self.expression( 2607 exp.Directory, 2608 this=self._parse_var_or_string(), 2609 local=local, 2610 row_format=self._parse_row_format(match_row=True), 2611 ) 2612 else: 2613 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2614 comments += ensure_list(self._prev_comments) 2615 return self._parse_multitable_inserts(comments) 2616 2617 if self._match(TokenType.OR): 2618 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2619 2620 self._match(TokenType.INTO) 2621 comments += ensure_list(self._prev_comments) 2622 self._match(TokenType.TABLE) 2623 is_function = self._match(TokenType.FUNCTION) 2624 2625 this = ( 2626 self._parse_table(schema=True, parse_partition=True) 2627 if not is_function 2628 else self._parse_function() 2629 ) 2630 2631 returning = self._parse_returning() 2632 2633 return self.expression( 2634 exp.Insert, 2635 comments=comments, 2636 hint=hint, 2637 is_function=is_function, 2638 this=this, 2639 stored=self._match_text_seq("STORED") and self._parse_stored(), 2640 by_name=self._match_text_seq("BY", "NAME"), 2641 exists=self._parse_exists(), 2642 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2643 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2644 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2645 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2646 conflict=self._parse_on_conflict(), 2647 returning=returning or self._parse_returning(), 2648 overwrite=overwrite, 2649 alternative=alternative, 2650 ignore=ignore, 2651 source=self._match(TokenType.TABLE) and self._parse_table(), 2652 ) 2653 2654 def _parse_kill(self) -> exp.Kill: 2655 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2656 2657 return self.expression( 2658 exp.Kill, 2659 this=self._parse_primary(), 2660 kind=kind, 2661 ) 2662 2663 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2664 conflict = self._match_text_seq("ON", "CONFLICT") 2665 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2666 2667 if not conflict and not duplicate: 2668 return None 2669 2670 conflict_keys = None 2671 constraint = None 2672 2673 if conflict: 2674 if self._match_text_seq("ON", "CONSTRAINT"): 2675 constraint = self._parse_id_var() 2676 elif self._match(TokenType.L_PAREN): 2677 conflict_keys = self._parse_csv(self._parse_id_var) 2678 self._match_r_paren() 2679 2680 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2681 if self._prev.token_type == TokenType.UPDATE: 2682 self._match(TokenType.SET) 2683 expressions = self._parse_csv(self._parse_equality) 2684 else: 2685 expressions = None 2686 2687 return self.expression( 2688 exp.OnConflict, 2689 duplicate=duplicate, 2690 expressions=expressions, 2691 action=action, 2692 conflict_keys=conflict_keys, 2693 constraint=constraint, 2694 ) 2695 2696 def _parse_returning(self) -> t.Optional[exp.Returning]: 2697 if not self._match(TokenType.RETURNING): 2698 return None 2699 return self.expression( 2700 exp.Returning, 2701 expressions=self._parse_csv(self._parse_expression), 2702 into=self._match(TokenType.INTO) and self._parse_table_part(), 2703 ) 2704 2705 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2706 if not self._match(TokenType.FORMAT): 2707 return None 2708 return self._parse_row_format() 2709 2710 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2711 index = self._index 2712 with_ = with_ or self._match_text_seq("WITH") 2713 2714 if not self._match(TokenType.SERDE_PROPERTIES): 2715 self._retreat(index) 2716 return None 2717 return self.expression( 2718 exp.SerdeProperties, 2719 **{ # type: ignore 2720 "expressions": self._parse_wrapped_properties(), 2721 "with": with_, 2722 }, 2723 ) 2724 2725 def _parse_row_format( 2726 self, match_row: bool = False 2727 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2728 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2729 return None 2730 2731 if self._match_text_seq("SERDE"): 2732 this = self._parse_string() 2733 2734 serde_properties = self._parse_serde_properties() 2735 2736 return self.expression( 2737 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2738 ) 2739 2740 self._match_text_seq("DELIMITED") 2741 2742 kwargs = {} 2743 2744 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2745 kwargs["fields"] = self._parse_string() 2746 if self._match_text_seq("ESCAPED", "BY"): 2747 kwargs["escaped"] = self._parse_string() 2748 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2749 kwargs["collection_items"] = self._parse_string() 2750 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2751 kwargs["map_keys"] = self._parse_string() 2752 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2753 kwargs["lines"] = self._parse_string() 2754 if self._match_text_seq("NULL", "DEFINED", "AS"): 2755 kwargs["null"] = self._parse_string() 2756 2757 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2758 2759 def _parse_load(self) -> exp.LoadData | exp.Command: 2760 if self._match_text_seq("DATA"): 2761 local = self._match_text_seq("LOCAL") 2762 self._match_text_seq("INPATH") 2763 inpath = self._parse_string() 2764 overwrite = self._match(TokenType.OVERWRITE) 2765 self._match_pair(TokenType.INTO, TokenType.TABLE) 2766 2767 return self.expression( 2768 exp.LoadData, 2769 this=self._parse_table(schema=True), 2770 local=local, 2771 overwrite=overwrite, 2772 inpath=inpath, 2773 partition=self._parse_partition(), 2774 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2775 serde=self._match_text_seq("SERDE") and self._parse_string(), 2776 ) 2777 return self._parse_as_command(self._prev) 2778 2779 def _parse_delete(self) -> exp.Delete: 2780 # This handles MySQL's "Multiple-Table Syntax" 2781 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2782 tables = None 2783 comments = self._prev_comments 2784 if not self._match(TokenType.FROM, advance=False): 2785 tables = self._parse_csv(self._parse_table) or None 2786 2787 returning = self._parse_returning() 2788 2789 return self.expression( 2790 exp.Delete, 2791 comments=comments, 2792 tables=tables, 2793 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2794 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2795 where=self._parse_where(), 2796 returning=returning or self._parse_returning(), 2797 limit=self._parse_limit(), 2798 ) 2799 2800 def _parse_update(self) -> exp.Update: 2801 comments = self._prev_comments 2802 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2803 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2804 returning = self._parse_returning() 2805 return self.expression( 2806 exp.Update, 2807 comments=comments, 2808 **{ # type: ignore 2809 "this": this, 2810 "expressions": expressions, 2811 "from": self._parse_from(joins=True), 2812 "where": self._parse_where(), 2813 "returning": returning or self._parse_returning(), 2814 "order": self._parse_order(), 2815 "limit": self._parse_limit(), 2816 }, 2817 ) 2818 2819 def _parse_uncache(self) -> exp.Uncache: 2820 if not self._match(TokenType.TABLE): 2821 self.raise_error("Expecting TABLE after UNCACHE") 2822 2823 return self.expression( 2824 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2825 ) 2826 2827 def _parse_cache(self) -> exp.Cache: 2828 lazy = self._match_text_seq("LAZY") 2829 self._match(TokenType.TABLE) 2830 table = self._parse_table(schema=True) 2831 2832 options = [] 2833 if self._match_text_seq("OPTIONS"): 2834 self._match_l_paren() 2835 k = self._parse_string() 2836 self._match(TokenType.EQ) 2837 v = self._parse_string() 2838 options = [k, v] 2839 self._match_r_paren() 2840 2841 self._match(TokenType.ALIAS) 2842 return self.expression( 2843 exp.Cache, 2844 this=table, 2845 lazy=lazy, 2846 options=options, 2847 expression=self._parse_select(nested=True), 2848 ) 2849 2850 def _parse_partition(self) -> t.Optional[exp.Partition]: 2851 if not self._match(TokenType.PARTITION): 2852 return None 2853 2854 return self.expression( 2855 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2856 ) 2857 2858 def _parse_value(self) -> t.Optional[exp.Tuple]: 2859 if self._match(TokenType.L_PAREN): 2860 expressions = self._parse_csv(self._parse_expression) 2861 self._match_r_paren() 2862 return self.expression(exp.Tuple, expressions=expressions) 2863 2864 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2865 expression = self._parse_expression() 2866 if expression: 2867 return self.expression(exp.Tuple, expressions=[expression]) 2868 return None 2869 2870 def _parse_projections(self) -> t.List[exp.Expression]: 2871 return self._parse_expressions() 2872 2873 def _parse_select( 2874 self, 2875 nested: bool = False, 2876 table: bool = False, 2877 parse_subquery_alias: bool = True, 2878 parse_set_operation: bool = True, 2879 ) -> t.Optional[exp.Expression]: 2880 cte = self._parse_with() 2881 2882 if cte: 2883 this = self._parse_statement() 2884 2885 if not this: 2886 self.raise_error("Failed to parse any statement following CTE") 2887 return cte 2888 2889 if "with" in this.arg_types: 2890 this.set("with", cte) 2891 else: 2892 self.raise_error(f"{this.key} does not support CTE") 2893 this = cte 2894 2895 return this 2896 2897 # duckdb supports leading with FROM x 2898 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2899 2900 if self._match(TokenType.SELECT): 2901 comments = self._prev_comments 2902 2903 hint = self._parse_hint() 2904 2905 if self._next and not self._next.token_type == TokenType.DOT: 2906 all_ = self._match(TokenType.ALL) 2907 distinct = self._match_set(self.DISTINCT_TOKENS) 2908 else: 2909 all_, distinct = None, None 2910 2911 kind = ( 2912 self._match(TokenType.ALIAS) 2913 and self._match_texts(("STRUCT", "VALUE")) 2914 and self._prev.text.upper() 2915 ) 2916 2917 if distinct: 2918 distinct = self.expression( 2919 exp.Distinct, 2920 on=self._parse_value() if self._match(TokenType.ON) else None, 2921 ) 2922 2923 if all_ and distinct: 2924 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2925 2926 limit = self._parse_limit(top=True) 2927 projections = self._parse_projections() 2928 2929 this = self.expression( 2930 exp.Select, 2931 kind=kind, 2932 hint=hint, 2933 distinct=distinct, 2934 expressions=projections, 2935 limit=limit, 2936 ) 2937 this.comments = comments 2938 2939 into = self._parse_into() 2940 if into: 2941 this.set("into", into) 2942 2943 if not from_: 2944 from_ = self._parse_from() 2945 2946 if from_: 2947 this.set("from", from_) 2948 2949 this = self._parse_query_modifiers(this) 2950 elif (table or nested) and self._match(TokenType.L_PAREN): 2951 if self._match(TokenType.PIVOT): 2952 this = self._parse_simplified_pivot() 2953 elif self._match(TokenType.FROM): 2954 this = exp.select("*").from_( 2955 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2956 ) 2957 else: 2958 this = ( 2959 self._parse_table() 2960 if table 2961 else self._parse_select(nested=True, parse_set_operation=False) 2962 ) 2963 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2964 2965 self._match_r_paren() 2966 2967 # We return early here so that the UNION isn't attached to the subquery by the 2968 # following call to _parse_set_operations, but instead becomes the parent node 2969 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2970 elif self._match(TokenType.VALUES, advance=False): 2971 this = self._parse_derived_table_values() 2972 elif from_: 2973 this = exp.select("*").from_(from_.this, copy=False) 2974 elif self._match(TokenType.SUMMARIZE): 2975 table = self._match(TokenType.TABLE) 2976 this = self._parse_select() or self._parse_string() or self._parse_table() 2977 return self.expression(exp.Summarize, this=this, table=table) 2978 elif self._match(TokenType.DESCRIBE): 2979 this = self._parse_describe() 2980 elif self._match_text_seq("STREAM"): 2981 this = self.expression(exp.Stream, this=self._parse_function()) 2982 else: 2983 this = None 2984 2985 return self._parse_set_operations(this) if parse_set_operation else this 2986 2987 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2988 if not skip_with_token and not self._match(TokenType.WITH): 2989 return None 2990 2991 comments = self._prev_comments 2992 recursive = self._match(TokenType.RECURSIVE) 2993 2994 expressions = [] 2995 while True: 2996 expressions.append(self._parse_cte()) 2997 2998 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2999 break 3000 else: 3001 self._match(TokenType.WITH) 3002 3003 return self.expression( 3004 exp.With, comments=comments, expressions=expressions, recursive=recursive 3005 ) 3006 3007 def _parse_cte(self) -> exp.CTE: 3008 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3009 if not alias or not alias.this: 3010 self.raise_error("Expected CTE to have alias") 3011 3012 self._match(TokenType.ALIAS) 3013 comments = self._prev_comments 3014 3015 if self._match_text_seq("NOT", "MATERIALIZED"): 3016 materialized = False 3017 elif self._match_text_seq("MATERIALIZED"): 3018 materialized = True 3019 else: 3020 materialized = None 3021 3022 return self.expression( 3023 exp.CTE, 3024 this=self._parse_wrapped(self._parse_statement), 3025 alias=alias, 3026 materialized=materialized, 3027 comments=comments, 3028 ) 3029 3030 def _parse_table_alias( 3031 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3032 ) -> t.Optional[exp.TableAlias]: 3033 any_token = self._match(TokenType.ALIAS) 3034 alias = ( 3035 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3036 or self._parse_string_as_identifier() 3037 ) 3038 3039 index = self._index 3040 if self._match(TokenType.L_PAREN): 3041 columns = self._parse_csv(self._parse_function_parameter) 3042 self._match_r_paren() if columns else self._retreat(index) 3043 else: 3044 columns = None 3045 3046 if not alias and not columns: 3047 return None 3048 3049 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3050 3051 # We bubble up comments from the Identifier to the TableAlias 3052 if isinstance(alias, exp.Identifier): 3053 table_alias.add_comments(alias.pop_comments()) 3054 3055 return table_alias 3056 3057 def _parse_subquery( 3058 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3059 ) -> t.Optional[exp.Subquery]: 3060 if not this: 3061 return None 3062 3063 return self.expression( 3064 exp.Subquery, 3065 this=this, 3066 pivots=self._parse_pivots(), 3067 alias=self._parse_table_alias() if parse_alias else None, 3068 sample=self._parse_table_sample(), 3069 ) 3070 3071 def _implicit_unnests_to_explicit(self, this: E) -> E: 3072 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3073 3074 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3075 for i, join in enumerate(this.args.get("joins") or []): 3076 table = join.this 3077 normalized_table = table.copy() 3078 normalized_table.meta["maybe_column"] = True 3079 normalized_table = _norm(normalized_table, dialect=self.dialect) 3080 3081 if isinstance(table, exp.Table) and not join.args.get("on"): 3082 if normalized_table.parts[0].name in refs: 3083 table_as_column = table.to_column() 3084 unnest = exp.Unnest(expressions=[table_as_column]) 3085 3086 # Table.to_column creates a parent Alias node that we want to convert to 3087 # a TableAlias and attach to the Unnest, so it matches the parser's output 3088 if isinstance(table.args.get("alias"), exp.TableAlias): 3089 table_as_column.replace(table_as_column.this) 3090 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3091 3092 table.replace(unnest) 3093 3094 refs.add(normalized_table.alias_or_name) 3095 3096 return this 3097 3098 def _parse_query_modifiers( 3099 self, this: t.Optional[exp.Expression] 3100 ) -> t.Optional[exp.Expression]: 3101 if isinstance(this, (exp.Query, exp.Table)): 3102 for join in self._parse_joins(): 3103 this.append("joins", join) 3104 for lateral in iter(self._parse_lateral, None): 3105 this.append("laterals", lateral) 3106 3107 while True: 3108 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3109 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3110 key, expression = parser(self) 3111 3112 if expression: 3113 this.set(key, expression) 3114 if key == "limit": 3115 offset = expression.args.pop("offset", None) 3116 3117 if offset: 3118 offset = exp.Offset(expression=offset) 3119 this.set("offset", offset) 3120 3121 limit_by_expressions = expression.expressions 3122 expression.set("expressions", None) 3123 offset.set("expressions", limit_by_expressions) 3124 continue 3125 break 3126 3127 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3128 this = self._implicit_unnests_to_explicit(this) 3129 3130 return this 3131 3132 def _parse_hint(self) -> t.Optional[exp.Hint]: 3133 if self._match(TokenType.HINT): 3134 hints = [] 3135 for hint in iter( 3136 lambda: self._parse_csv( 3137 lambda: self._parse_function() or self._parse_var(upper=True) 3138 ), 3139 [], 3140 ): 3141 hints.extend(hint) 3142 3143 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3144 self.raise_error("Expected */ after HINT") 3145 3146 return self.expression(exp.Hint, expressions=hints) 3147 3148 return None 3149 3150 def _parse_into(self) -> t.Optional[exp.Into]: 3151 if not self._match(TokenType.INTO): 3152 return None 3153 3154 temp = self._match(TokenType.TEMPORARY) 3155 unlogged = self._match_text_seq("UNLOGGED") 3156 self._match(TokenType.TABLE) 3157 3158 return self.expression( 3159 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3160 ) 3161 3162 def _parse_from( 3163 self, joins: bool = False, skip_from_token: bool = False 3164 ) -> t.Optional[exp.From]: 3165 if not skip_from_token and not self._match(TokenType.FROM): 3166 return None 3167 3168 return self.expression( 3169 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3170 ) 3171 3172 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3173 return self.expression( 3174 exp.MatchRecognizeMeasure, 3175 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3176 this=self._parse_expression(), 3177 ) 3178 3179 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3180 if not self._match(TokenType.MATCH_RECOGNIZE): 3181 return None 3182 3183 self._match_l_paren() 3184 3185 partition = self._parse_partition_by() 3186 order = self._parse_order() 3187 3188 measures = ( 3189 self._parse_csv(self._parse_match_recognize_measure) 3190 if self._match_text_seq("MEASURES") 3191 else None 3192 ) 3193 3194 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3195 rows = exp.var("ONE ROW PER MATCH") 3196 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3197 text = "ALL ROWS PER MATCH" 3198 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3199 text += " SHOW EMPTY MATCHES" 3200 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3201 text += " OMIT EMPTY MATCHES" 3202 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3203 text += " WITH UNMATCHED ROWS" 3204 rows = exp.var(text) 3205 else: 3206 rows = None 3207 3208 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3209 text = "AFTER MATCH SKIP" 3210 if self._match_text_seq("PAST", "LAST", "ROW"): 3211 text += " PAST LAST ROW" 3212 elif self._match_text_seq("TO", "NEXT", "ROW"): 3213 text += " TO NEXT ROW" 3214 elif self._match_text_seq("TO", "FIRST"): 3215 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3216 elif self._match_text_seq("TO", "LAST"): 3217 text += f" TO LAST {self._advance_any().text}" # type: ignore 3218 after = exp.var(text) 3219 else: 3220 after = None 3221 3222 if self._match_text_seq("PATTERN"): 3223 self._match_l_paren() 3224 3225 if not self._curr: 3226 self.raise_error("Expecting )", self._curr) 3227 3228 paren = 1 3229 start = self._curr 3230 3231 while self._curr and paren > 0: 3232 if self._curr.token_type == TokenType.L_PAREN: 3233 paren += 1 3234 if self._curr.token_type == TokenType.R_PAREN: 3235 paren -= 1 3236 3237 end = self._prev 3238 self._advance() 3239 3240 if paren > 0: 3241 self.raise_error("Expecting )", self._curr) 3242 3243 pattern = exp.var(self._find_sql(start, end)) 3244 else: 3245 pattern = None 3246 3247 define = ( 3248 self._parse_csv(self._parse_name_as_expression) 3249 if self._match_text_seq("DEFINE") 3250 else None 3251 ) 3252 3253 self._match_r_paren() 3254 3255 return self.expression( 3256 exp.MatchRecognize, 3257 partition_by=partition, 3258 order=order, 3259 measures=measures, 3260 rows=rows, 3261 after=after, 3262 pattern=pattern, 3263 define=define, 3264 alias=self._parse_table_alias(), 3265 ) 3266 3267 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3268 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3269 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3270 cross_apply = False 3271 3272 if cross_apply is not None: 3273 this = self._parse_select(table=True) 3274 view = None 3275 outer = None 3276 elif self._match(TokenType.LATERAL): 3277 this = self._parse_select(table=True) 3278 view = self._match(TokenType.VIEW) 3279 outer = self._match(TokenType.OUTER) 3280 else: 3281 return None 3282 3283 if not this: 3284 this = ( 3285 self._parse_unnest() 3286 or self._parse_function() 3287 or self._parse_id_var(any_token=False) 3288 ) 3289 3290 while self._match(TokenType.DOT): 3291 this = exp.Dot( 3292 this=this, 3293 expression=self._parse_function() or self._parse_id_var(any_token=False), 3294 ) 3295 3296 if view: 3297 table = self._parse_id_var(any_token=False) 3298 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3299 table_alias: t.Optional[exp.TableAlias] = self.expression( 3300 exp.TableAlias, this=table, columns=columns 3301 ) 3302 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3303 # We move the alias from the lateral's child node to the lateral itself 3304 table_alias = this.args["alias"].pop() 3305 else: 3306 table_alias = self._parse_table_alias() 3307 3308 return self.expression( 3309 exp.Lateral, 3310 this=this, 3311 view=view, 3312 outer=outer, 3313 alias=table_alias, 3314 cross_apply=cross_apply, 3315 ) 3316 3317 def _parse_join_parts( 3318 self, 3319 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3320 return ( 3321 self._match_set(self.JOIN_METHODS) and self._prev, 3322 self._match_set(self.JOIN_SIDES) and self._prev, 3323 self._match_set(self.JOIN_KINDS) and self._prev, 3324 ) 3325 3326 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3327 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3328 this = self._parse_column() 3329 if isinstance(this, exp.Column): 3330 return this.this 3331 return this 3332 3333 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3334 3335 def _parse_join( 3336 self, skip_join_token: bool = False, parse_bracket: bool = False 3337 ) -> t.Optional[exp.Join]: 3338 if self._match(TokenType.COMMA): 3339 return self.expression(exp.Join, this=self._parse_table()) 3340 3341 index = self._index 3342 method, side, kind = self._parse_join_parts() 3343 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3344 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3345 3346 if not skip_join_token and not join: 3347 self._retreat(index) 3348 kind = None 3349 method = None 3350 side = None 3351 3352 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3353 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3354 3355 if not skip_join_token and not join and not outer_apply and not cross_apply: 3356 return None 3357 3358 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3359 3360 if method: 3361 kwargs["method"] = method.text 3362 if side: 3363 kwargs["side"] = side.text 3364 if kind: 3365 kwargs["kind"] = kind.text 3366 if hint: 3367 kwargs["hint"] = hint 3368 3369 if self._match(TokenType.MATCH_CONDITION): 3370 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3371 3372 if self._match(TokenType.ON): 3373 kwargs["on"] = self._parse_assignment() 3374 elif self._match(TokenType.USING): 3375 kwargs["using"] = self._parse_using_identifiers() 3376 elif ( 3377 not (outer_apply or cross_apply) 3378 and not isinstance(kwargs["this"], exp.Unnest) 3379 and not (kind and kind.token_type == TokenType.CROSS) 3380 ): 3381 index = self._index 3382 joins: t.Optional[list] = list(self._parse_joins()) 3383 3384 if joins and self._match(TokenType.ON): 3385 kwargs["on"] = self._parse_assignment() 3386 elif joins and self._match(TokenType.USING): 3387 kwargs["using"] = self._parse_using_identifiers() 3388 else: 3389 joins = None 3390 self._retreat(index) 3391 3392 kwargs["this"].set("joins", joins if joins else None) 3393 3394 comments = [c for token in (method, side, kind) if token for c in token.comments] 3395 return self.expression(exp.Join, comments=comments, **kwargs) 3396 3397 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3398 this = self._parse_assignment() 3399 3400 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3401 return this 3402 3403 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3404 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3405 3406 return this 3407 3408 def _parse_index_params(self) -> exp.IndexParameters: 3409 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3410 3411 if self._match(TokenType.L_PAREN, advance=False): 3412 columns = self._parse_wrapped_csv(self._parse_with_operator) 3413 else: 3414 columns = None 3415 3416 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3417 partition_by = self._parse_partition_by() 3418 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3419 tablespace = ( 3420 self._parse_var(any_token=True) 3421 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3422 else None 3423 ) 3424 where = self._parse_where() 3425 3426 on = self._parse_field() if self._match(TokenType.ON) else None 3427 3428 return self.expression( 3429 exp.IndexParameters, 3430 using=using, 3431 columns=columns, 3432 include=include, 3433 partition_by=partition_by, 3434 where=where, 3435 with_storage=with_storage, 3436 tablespace=tablespace, 3437 on=on, 3438 ) 3439 3440 def _parse_index( 3441 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3442 ) -> t.Optional[exp.Index]: 3443 if index or anonymous: 3444 unique = None 3445 primary = None 3446 amp = None 3447 3448 self._match(TokenType.ON) 3449 self._match(TokenType.TABLE) # hive 3450 table = self._parse_table_parts(schema=True) 3451 else: 3452 unique = self._match(TokenType.UNIQUE) 3453 primary = self._match_text_seq("PRIMARY") 3454 amp = self._match_text_seq("AMP") 3455 3456 if not self._match(TokenType.INDEX): 3457 return None 3458 3459 index = self._parse_id_var() 3460 table = None 3461 3462 params = self._parse_index_params() 3463 3464 return self.expression( 3465 exp.Index, 3466 this=index, 3467 table=table, 3468 unique=unique, 3469 primary=primary, 3470 amp=amp, 3471 params=params, 3472 ) 3473 3474 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3475 hints: t.List[exp.Expression] = [] 3476 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3477 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3478 hints.append( 3479 self.expression( 3480 exp.WithTableHint, 3481 expressions=self._parse_csv( 3482 lambda: self._parse_function() or self._parse_var(any_token=True) 3483 ), 3484 ) 3485 ) 3486 self._match_r_paren() 3487 else: 3488 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3489 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3490 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3491 3492 self._match_set((TokenType.INDEX, TokenType.KEY)) 3493 if self._match(TokenType.FOR): 3494 hint.set("target", self._advance_any() and self._prev.text.upper()) 3495 3496 hint.set("expressions", self._parse_wrapped_id_vars()) 3497 hints.append(hint) 3498 3499 return hints or None 3500 3501 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3502 return ( 3503 (not schema and self._parse_function(optional_parens=False)) 3504 or self._parse_id_var(any_token=False) 3505 or self._parse_string_as_identifier() 3506 or self._parse_placeholder() 3507 ) 3508 3509 def _parse_table_parts( 3510 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3511 ) -> exp.Table: 3512 catalog = None 3513 db = None 3514 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3515 3516 while self._match(TokenType.DOT): 3517 if catalog: 3518 # This allows nesting the table in arbitrarily many dot expressions if needed 3519 table = self.expression( 3520 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3521 ) 3522 else: 3523 catalog = db 3524 db = table 3525 # "" used for tsql FROM a..b case 3526 table = self._parse_table_part(schema=schema) or "" 3527 3528 if ( 3529 wildcard 3530 and self._is_connected() 3531 and (isinstance(table, exp.Identifier) or not table) 3532 and self._match(TokenType.STAR) 3533 ): 3534 if isinstance(table, exp.Identifier): 3535 table.args["this"] += "*" 3536 else: 3537 table = exp.Identifier(this="*") 3538 3539 # We bubble up comments from the Identifier to the Table 3540 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3541 3542 if is_db_reference: 3543 catalog = db 3544 db = table 3545 table = None 3546 3547 if not table and not is_db_reference: 3548 self.raise_error(f"Expected table name but got {self._curr}") 3549 if not db and is_db_reference: 3550 self.raise_error(f"Expected database name but got {self._curr}") 3551 3552 table = self.expression( 3553 exp.Table, 3554 comments=comments, 3555 this=table, 3556 db=db, 3557 catalog=catalog, 3558 ) 3559 3560 changes = self._parse_changes() 3561 if changes: 3562 table.set("changes", changes) 3563 3564 at_before = self._parse_historical_data() 3565 if at_before: 3566 table.set("when", at_before) 3567 3568 pivots = self._parse_pivots() 3569 if pivots: 3570 table.set("pivots", pivots) 3571 3572 return table 3573 3574 def _parse_table( 3575 self, 3576 schema: bool = False, 3577 joins: bool = False, 3578 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3579 parse_bracket: bool = False, 3580 is_db_reference: bool = False, 3581 parse_partition: bool = False, 3582 ) -> t.Optional[exp.Expression]: 3583 lateral = self._parse_lateral() 3584 if lateral: 3585 return lateral 3586 3587 unnest = self._parse_unnest() 3588 if unnest: 3589 return unnest 3590 3591 values = self._parse_derived_table_values() 3592 if values: 3593 return values 3594 3595 subquery = self._parse_select(table=True) 3596 if subquery: 3597 if not subquery.args.get("pivots"): 3598 subquery.set("pivots", self._parse_pivots()) 3599 return subquery 3600 3601 bracket = parse_bracket and self._parse_bracket(None) 3602 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3603 3604 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3605 self._parse_table 3606 ) 3607 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3608 3609 only = self._match(TokenType.ONLY) 3610 3611 this = t.cast( 3612 exp.Expression, 3613 bracket 3614 or rows_from 3615 or self._parse_bracket( 3616 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3617 ), 3618 ) 3619 3620 if only: 3621 this.set("only", only) 3622 3623 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3624 self._match_text_seq("*") 3625 3626 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3627 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3628 this.set("partition", self._parse_partition()) 3629 3630 if schema: 3631 return self._parse_schema(this=this) 3632 3633 version = self._parse_version() 3634 3635 if version: 3636 this.set("version", version) 3637 3638 if self.dialect.ALIAS_POST_TABLESAMPLE: 3639 this.set("sample", self._parse_table_sample()) 3640 3641 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3642 if alias: 3643 this.set("alias", alias) 3644 3645 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3646 return self.expression( 3647 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3648 ) 3649 3650 this.set("hints", self._parse_table_hints()) 3651 3652 if not this.args.get("pivots"): 3653 this.set("pivots", self._parse_pivots()) 3654 3655 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3656 this.set("sample", self._parse_table_sample()) 3657 3658 if joins: 3659 for join in self._parse_joins(): 3660 this.append("joins", join) 3661 3662 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3663 this.set("ordinality", True) 3664 this.set("alias", self._parse_table_alias()) 3665 3666 return this 3667 3668 def _parse_version(self) -> t.Optional[exp.Version]: 3669 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3670 this = "TIMESTAMP" 3671 elif self._match(TokenType.VERSION_SNAPSHOT): 3672 this = "VERSION" 3673 else: 3674 return None 3675 3676 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3677 kind = self._prev.text.upper() 3678 start = self._parse_bitwise() 3679 self._match_texts(("TO", "AND")) 3680 end = self._parse_bitwise() 3681 expression: t.Optional[exp.Expression] = self.expression( 3682 exp.Tuple, expressions=[start, end] 3683 ) 3684 elif self._match_text_seq("CONTAINED", "IN"): 3685 kind = "CONTAINED IN" 3686 expression = self.expression( 3687 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3688 ) 3689 elif self._match(TokenType.ALL): 3690 kind = "ALL" 3691 expression = None 3692 else: 3693 self._match_text_seq("AS", "OF") 3694 kind = "AS OF" 3695 expression = self._parse_type() 3696 3697 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3698 3699 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3700 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3701 index = self._index 3702 historical_data = None 3703 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3704 this = self._prev.text.upper() 3705 kind = ( 3706 self._match(TokenType.L_PAREN) 3707 and self._match_texts(self.HISTORICAL_DATA_KIND) 3708 and self._prev.text.upper() 3709 ) 3710 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3711 3712 if expression: 3713 self._match_r_paren() 3714 historical_data = self.expression( 3715 exp.HistoricalData, this=this, kind=kind, expression=expression 3716 ) 3717 else: 3718 self._retreat(index) 3719 3720 return historical_data 3721 3722 def _parse_changes(self) -> t.Optional[exp.Changes]: 3723 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3724 return None 3725 3726 information = self._parse_var(any_token=True) 3727 self._match_r_paren() 3728 3729 return self.expression( 3730 exp.Changes, 3731 information=information, 3732 at_before=self._parse_historical_data(), 3733 end=self._parse_historical_data(), 3734 ) 3735 3736 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3737 if not self._match(TokenType.UNNEST): 3738 return None 3739 3740 expressions = self._parse_wrapped_csv(self._parse_equality) 3741 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3742 3743 alias = self._parse_table_alias() if with_alias else None 3744 3745 if alias: 3746 if self.dialect.UNNEST_COLUMN_ONLY: 3747 if alias.args.get("columns"): 3748 self.raise_error("Unexpected extra column alias in unnest.") 3749 3750 alias.set("columns", [alias.this]) 3751 alias.set("this", None) 3752 3753 columns = alias.args.get("columns") or [] 3754 if offset and len(expressions) < len(columns): 3755 offset = columns.pop() 3756 3757 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3758 self._match(TokenType.ALIAS) 3759 offset = self._parse_id_var( 3760 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3761 ) or exp.to_identifier("offset") 3762 3763 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3764 3765 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3766 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3767 if not is_derived and not ( 3768 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3769 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3770 ): 3771 return None 3772 3773 expressions = self._parse_csv(self._parse_value) 3774 alias = self._parse_table_alias() 3775 3776 if is_derived: 3777 self._match_r_paren() 3778 3779 return self.expression( 3780 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3781 ) 3782 3783 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3784 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3785 as_modifier and self._match_text_seq("USING", "SAMPLE") 3786 ): 3787 return None 3788 3789 bucket_numerator = None 3790 bucket_denominator = None 3791 bucket_field = None 3792 percent = None 3793 size = None 3794 seed = None 3795 3796 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3797 matched_l_paren = self._match(TokenType.L_PAREN) 3798 3799 if self.TABLESAMPLE_CSV: 3800 num = None 3801 expressions = self._parse_csv(self._parse_primary) 3802 else: 3803 expressions = None 3804 num = ( 3805 self._parse_factor() 3806 if self._match(TokenType.NUMBER, advance=False) 3807 else self._parse_primary() or self._parse_placeholder() 3808 ) 3809 3810 if self._match_text_seq("BUCKET"): 3811 bucket_numerator = self._parse_number() 3812 self._match_text_seq("OUT", "OF") 3813 bucket_denominator = bucket_denominator = self._parse_number() 3814 self._match(TokenType.ON) 3815 bucket_field = self._parse_field() 3816 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3817 percent = num 3818 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3819 size = num 3820 else: 3821 percent = num 3822 3823 if matched_l_paren: 3824 self._match_r_paren() 3825 3826 if self._match(TokenType.L_PAREN): 3827 method = self._parse_var(upper=True) 3828 seed = self._match(TokenType.COMMA) and self._parse_number() 3829 self._match_r_paren() 3830 elif self._match_texts(("SEED", "REPEATABLE")): 3831 seed = self._parse_wrapped(self._parse_number) 3832 3833 if not method and self.DEFAULT_SAMPLING_METHOD: 3834 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3835 3836 return self.expression( 3837 exp.TableSample, 3838 expressions=expressions, 3839 method=method, 3840 bucket_numerator=bucket_numerator, 3841 bucket_denominator=bucket_denominator, 3842 bucket_field=bucket_field, 3843 percent=percent, 3844 size=size, 3845 seed=seed, 3846 ) 3847 3848 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3849 return list(iter(self._parse_pivot, None)) or None 3850 3851 def _parse_joins(self) -> t.Iterator[exp.Join]: 3852 return iter(self._parse_join, None) 3853 3854 # https://duckdb.org/docs/sql/statements/pivot 3855 def _parse_simplified_pivot(self) -> exp.Pivot: 3856 def _parse_on() -> t.Optional[exp.Expression]: 3857 this = self._parse_bitwise() 3858 return self._parse_in(this) if self._match(TokenType.IN) else this 3859 3860 this = self._parse_table() 3861 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3862 using = self._match(TokenType.USING) and self._parse_csv( 3863 lambda: self._parse_alias(self._parse_function()) 3864 ) 3865 group = self._parse_group() 3866 return self.expression( 3867 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3868 ) 3869 3870 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3871 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3872 this = self._parse_select_or_expression() 3873 3874 self._match(TokenType.ALIAS) 3875 alias = self._parse_bitwise() 3876 if alias: 3877 if isinstance(alias, exp.Column) and not alias.db: 3878 alias = alias.this 3879 return self.expression(exp.PivotAlias, this=this, alias=alias) 3880 3881 return this 3882 3883 value = self._parse_column() 3884 3885 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3886 self.raise_error("Expecting IN (") 3887 3888 if self._match(TokenType.ANY): 3889 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3890 else: 3891 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3892 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3893 3894 self._match_r_paren() 3895 return expr 3896 3897 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3898 index = self._index 3899 include_nulls = None 3900 3901 if self._match(TokenType.PIVOT): 3902 unpivot = False 3903 elif self._match(TokenType.UNPIVOT): 3904 unpivot = True 3905 3906 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3907 if self._match_text_seq("INCLUDE", "NULLS"): 3908 include_nulls = True 3909 elif self._match_text_seq("EXCLUDE", "NULLS"): 3910 include_nulls = False 3911 else: 3912 return None 3913 3914 expressions = [] 3915 3916 if not self._match(TokenType.L_PAREN): 3917 self._retreat(index) 3918 return None 3919 3920 if unpivot: 3921 expressions = self._parse_csv(self._parse_column) 3922 else: 3923 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3924 3925 if not expressions: 3926 self.raise_error("Failed to parse PIVOT's aggregation list") 3927 3928 if not self._match(TokenType.FOR): 3929 self.raise_error("Expecting FOR") 3930 3931 field = self._parse_pivot_in() 3932 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3933 self._parse_bitwise 3934 ) 3935 3936 self._match_r_paren() 3937 3938 pivot = self.expression( 3939 exp.Pivot, 3940 expressions=expressions, 3941 field=field, 3942 unpivot=unpivot, 3943 include_nulls=include_nulls, 3944 default_on_null=default_on_null, 3945 ) 3946 3947 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3948 pivot.set("alias", self._parse_table_alias()) 3949 3950 if not unpivot: 3951 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3952 3953 columns: t.List[exp.Expression] = [] 3954 for fld in pivot.args["field"].expressions: 3955 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3956 for name in names: 3957 if self.PREFIXED_PIVOT_COLUMNS: 3958 name = f"{name}_{field_name}" if name else field_name 3959 else: 3960 name = f"{field_name}_{name}" if name else field_name 3961 3962 columns.append(exp.to_identifier(name)) 3963 3964 pivot.set("columns", columns) 3965 3966 return pivot 3967 3968 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3969 return [agg.alias for agg in aggregations] 3970 3971 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3972 if not skip_where_token and not self._match(TokenType.PREWHERE): 3973 return None 3974 3975 return self.expression( 3976 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3977 ) 3978 3979 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3980 if not skip_where_token and not self._match(TokenType.WHERE): 3981 return None 3982 3983 return self.expression( 3984 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3985 ) 3986 3987 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3988 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3989 return None 3990 3991 elements: t.Dict[str, t.Any] = defaultdict(list) 3992 3993 if self._match(TokenType.ALL): 3994 elements["all"] = True 3995 elif self._match(TokenType.DISTINCT): 3996 elements["all"] = False 3997 3998 while True: 3999 index = self._index 4000 4001 elements["expressions"].extend( 4002 self._parse_csv( 4003 lambda: None 4004 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4005 else self._parse_assignment() 4006 ) 4007 ) 4008 4009 before_with_index = self._index 4010 with_prefix = self._match(TokenType.WITH) 4011 4012 if self._match(TokenType.ROLLUP): 4013 elements["rollup"].append( 4014 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4015 ) 4016 elif self._match(TokenType.CUBE): 4017 elements["cube"].append( 4018 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4019 ) 4020 elif self._match(TokenType.GROUPING_SETS): 4021 elements["grouping_sets"].append( 4022 self.expression( 4023 exp.GroupingSets, 4024 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4025 ) 4026 ) 4027 elif self._match_text_seq("TOTALS"): 4028 elements["totals"] = True # type: ignore 4029 4030 if before_with_index <= self._index <= before_with_index + 1: 4031 self._retreat(before_with_index) 4032 break 4033 4034 if index == self._index: 4035 break 4036 4037 return self.expression(exp.Group, **elements) # type: ignore 4038 4039 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4040 return self.expression( 4041 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4042 ) 4043 4044 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4045 if self._match(TokenType.L_PAREN): 4046 grouping_set = self._parse_csv(self._parse_column) 4047 self._match_r_paren() 4048 return self.expression(exp.Tuple, expressions=grouping_set) 4049 4050 return self._parse_column() 4051 4052 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4053 if not skip_having_token and not self._match(TokenType.HAVING): 4054 return None 4055 return self.expression(exp.Having, this=self._parse_assignment()) 4056 4057 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4058 if not self._match(TokenType.QUALIFY): 4059 return None 4060 return self.expression(exp.Qualify, this=self._parse_assignment()) 4061 4062 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4063 if skip_start_token: 4064 start = None 4065 elif self._match(TokenType.START_WITH): 4066 start = self._parse_assignment() 4067 else: 4068 return None 4069 4070 self._match(TokenType.CONNECT_BY) 4071 nocycle = self._match_text_seq("NOCYCLE") 4072 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4073 exp.Prior, this=self._parse_bitwise() 4074 ) 4075 connect = self._parse_assignment() 4076 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4077 4078 if not start and self._match(TokenType.START_WITH): 4079 start = self._parse_assignment() 4080 4081 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4082 4083 def _parse_name_as_expression(self) -> exp.Alias: 4084 return self.expression( 4085 exp.Alias, 4086 alias=self._parse_id_var(any_token=True), 4087 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4088 ) 4089 4090 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4091 if self._match_text_seq("INTERPOLATE"): 4092 return self._parse_wrapped_csv(self._parse_name_as_expression) 4093 return None 4094 4095 def _parse_order( 4096 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4097 ) -> t.Optional[exp.Expression]: 4098 siblings = None 4099 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4100 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4101 return this 4102 4103 siblings = True 4104 4105 return self.expression( 4106 exp.Order, 4107 this=this, 4108 expressions=self._parse_csv(self._parse_ordered), 4109 siblings=siblings, 4110 ) 4111 4112 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4113 if not self._match(token): 4114 return None 4115 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4116 4117 def _parse_ordered( 4118 self, parse_method: t.Optional[t.Callable] = None 4119 ) -> t.Optional[exp.Ordered]: 4120 this = parse_method() if parse_method else self._parse_assignment() 4121 if not this: 4122 return None 4123 4124 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4125 this = exp.var("ALL") 4126 4127 asc = self._match(TokenType.ASC) 4128 desc = self._match(TokenType.DESC) or (asc and False) 4129 4130 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4131 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4132 4133 nulls_first = is_nulls_first or False 4134 explicitly_null_ordered = is_nulls_first or is_nulls_last 4135 4136 if ( 4137 not explicitly_null_ordered 4138 and ( 4139 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4140 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4141 ) 4142 and self.dialect.NULL_ORDERING != "nulls_are_last" 4143 ): 4144 nulls_first = True 4145 4146 if self._match_text_seq("WITH", "FILL"): 4147 with_fill = self.expression( 4148 exp.WithFill, 4149 **{ # type: ignore 4150 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4151 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4152 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4153 "interpolate": self._parse_interpolate(), 4154 }, 4155 ) 4156 else: 4157 with_fill = None 4158 4159 return self.expression( 4160 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4161 ) 4162 4163 def _parse_limit( 4164 self, 4165 this: t.Optional[exp.Expression] = None, 4166 top: bool = False, 4167 skip_limit_token: bool = False, 4168 ) -> t.Optional[exp.Expression]: 4169 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4170 comments = self._prev_comments 4171 if top: 4172 limit_paren = self._match(TokenType.L_PAREN) 4173 expression = self._parse_term() if limit_paren else self._parse_number() 4174 4175 if limit_paren: 4176 self._match_r_paren() 4177 else: 4178 expression = self._parse_term() 4179 4180 if self._match(TokenType.COMMA): 4181 offset = expression 4182 expression = self._parse_term() 4183 else: 4184 offset = None 4185 4186 limit_exp = self.expression( 4187 exp.Limit, 4188 this=this, 4189 expression=expression, 4190 offset=offset, 4191 comments=comments, 4192 expressions=self._parse_limit_by(), 4193 ) 4194 4195 return limit_exp 4196 4197 if self._match(TokenType.FETCH): 4198 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4199 direction = self._prev.text.upper() if direction else "FIRST" 4200 4201 count = self._parse_field(tokens=self.FETCH_TOKENS) 4202 percent = self._match(TokenType.PERCENT) 4203 4204 self._match_set((TokenType.ROW, TokenType.ROWS)) 4205 4206 only = self._match_text_seq("ONLY") 4207 with_ties = self._match_text_seq("WITH", "TIES") 4208 4209 if only and with_ties: 4210 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4211 4212 return self.expression( 4213 exp.Fetch, 4214 direction=direction, 4215 count=count, 4216 percent=percent, 4217 with_ties=with_ties, 4218 ) 4219 4220 return this 4221 4222 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4223 if not self._match(TokenType.OFFSET): 4224 return this 4225 4226 count = self._parse_term() 4227 self._match_set((TokenType.ROW, TokenType.ROWS)) 4228 4229 return self.expression( 4230 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4231 ) 4232 4233 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4234 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4235 4236 def _parse_locks(self) -> t.List[exp.Lock]: 4237 locks = [] 4238 while True: 4239 if self._match_text_seq("FOR", "UPDATE"): 4240 update = True 4241 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4242 "LOCK", "IN", "SHARE", "MODE" 4243 ): 4244 update = False 4245 else: 4246 break 4247 4248 expressions = None 4249 if self._match_text_seq("OF"): 4250 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4251 4252 wait: t.Optional[bool | exp.Expression] = None 4253 if self._match_text_seq("NOWAIT"): 4254 wait = True 4255 elif self._match_text_seq("WAIT"): 4256 wait = self._parse_primary() 4257 elif self._match_text_seq("SKIP", "LOCKED"): 4258 wait = False 4259 4260 locks.append( 4261 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4262 ) 4263 4264 return locks 4265 4266 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4267 while this and self._match_set(self.SET_OPERATIONS): 4268 token_type = self._prev.token_type 4269 4270 if token_type == TokenType.UNION: 4271 operation: t.Type[exp.SetOperation] = exp.Union 4272 elif token_type == TokenType.EXCEPT: 4273 operation = exp.Except 4274 else: 4275 operation = exp.Intersect 4276 4277 comments = self._prev.comments 4278 4279 if self._match(TokenType.DISTINCT): 4280 distinct: t.Optional[bool] = True 4281 elif self._match(TokenType.ALL): 4282 distinct = False 4283 else: 4284 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4285 if distinct is None: 4286 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4287 4288 by_name = self._match_text_seq("BY", "NAME") 4289 expression = self._parse_select(nested=True, parse_set_operation=False) 4290 4291 this = self.expression( 4292 operation, 4293 comments=comments, 4294 this=this, 4295 distinct=distinct, 4296 by_name=by_name, 4297 expression=expression, 4298 ) 4299 4300 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4301 expression = this.expression 4302 4303 if expression: 4304 for arg in self.SET_OP_MODIFIERS: 4305 expr = expression.args.get(arg) 4306 if expr: 4307 this.set(arg, expr.pop()) 4308 4309 return this 4310 4311 def _parse_expression(self) -> t.Optional[exp.Expression]: 4312 return self._parse_alias(self._parse_assignment()) 4313 4314 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4315 this = self._parse_disjunction() 4316 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4317 # This allows us to parse <non-identifier token> := <expr> 4318 this = exp.column( 4319 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4320 ) 4321 4322 while self._match_set(self.ASSIGNMENT): 4323 this = self.expression( 4324 self.ASSIGNMENT[self._prev.token_type], 4325 this=this, 4326 comments=self._prev_comments, 4327 expression=self._parse_assignment(), 4328 ) 4329 4330 return this 4331 4332 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4333 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4334 4335 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4336 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4337 4338 def _parse_equality(self) -> t.Optional[exp.Expression]: 4339 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4340 4341 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4342 return self._parse_tokens(self._parse_range, self.COMPARISON) 4343 4344 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4345 this = this or self._parse_bitwise() 4346 negate = self._match(TokenType.NOT) 4347 4348 if self._match_set(self.RANGE_PARSERS): 4349 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4350 if not expression: 4351 return this 4352 4353 this = expression 4354 elif self._match(TokenType.ISNULL): 4355 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4356 4357 # Postgres supports ISNULL and NOTNULL for conditions. 4358 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4359 if self._match(TokenType.NOTNULL): 4360 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4361 this = self.expression(exp.Not, this=this) 4362 4363 if negate: 4364 this = self._negate_range(this) 4365 4366 if self._match(TokenType.IS): 4367 this = self._parse_is(this) 4368 4369 return this 4370 4371 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4372 if not this: 4373 return this 4374 4375 return self.expression(exp.Not, this=this) 4376 4377 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4378 index = self._index - 1 4379 negate = self._match(TokenType.NOT) 4380 4381 if self._match_text_seq("DISTINCT", "FROM"): 4382 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4383 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4384 4385 if self._match(TokenType.JSON): 4386 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4387 4388 if self._match_text_seq("WITH"): 4389 _with = True 4390 elif self._match_text_seq("WITHOUT"): 4391 _with = False 4392 else: 4393 _with = None 4394 4395 unique = self._match(TokenType.UNIQUE) 4396 self._match_text_seq("KEYS") 4397 expression: t.Optional[exp.Expression] = self.expression( 4398 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4399 ) 4400 else: 4401 expression = self._parse_primary() or self._parse_null() 4402 if not expression: 4403 self._retreat(index) 4404 return None 4405 4406 this = self.expression(exp.Is, this=this, expression=expression) 4407 return self.expression(exp.Not, this=this) if negate else this 4408 4409 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4410 unnest = self._parse_unnest(with_alias=False) 4411 if unnest: 4412 this = self.expression(exp.In, this=this, unnest=unnest) 4413 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4414 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4415 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4416 4417 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4418 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4419 else: 4420 this = self.expression(exp.In, this=this, expressions=expressions) 4421 4422 if matched_l_paren: 4423 self._match_r_paren(this) 4424 elif not self._match(TokenType.R_BRACKET, expression=this): 4425 self.raise_error("Expecting ]") 4426 else: 4427 this = self.expression(exp.In, this=this, field=self._parse_field()) 4428 4429 return this 4430 4431 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4432 low = self._parse_bitwise() 4433 self._match(TokenType.AND) 4434 high = self._parse_bitwise() 4435 return self.expression(exp.Between, this=this, low=low, high=high) 4436 4437 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4438 if not self._match(TokenType.ESCAPE): 4439 return this 4440 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4441 4442 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4443 index = self._index 4444 4445 if not self._match(TokenType.INTERVAL) and match_interval: 4446 return None 4447 4448 if self._match(TokenType.STRING, advance=False): 4449 this = self._parse_primary() 4450 else: 4451 this = self._parse_term() 4452 4453 if not this or ( 4454 isinstance(this, exp.Column) 4455 and not this.table 4456 and not this.this.quoted 4457 and this.name.upper() == "IS" 4458 ): 4459 self._retreat(index) 4460 return None 4461 4462 unit = self._parse_function() or ( 4463 not self._match(TokenType.ALIAS, advance=False) 4464 and self._parse_var(any_token=True, upper=True) 4465 ) 4466 4467 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4468 # each INTERVAL expression into this canonical form so it's easy to transpile 4469 if this and this.is_number: 4470 this = exp.Literal.string(this.to_py()) 4471 elif this and this.is_string: 4472 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4473 if len(parts) == 1: 4474 if unit: 4475 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4476 self._retreat(self._index - 1) 4477 4478 this = exp.Literal.string(parts[0][0]) 4479 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4480 4481 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4482 unit = self.expression( 4483 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4484 ) 4485 4486 interval = self.expression(exp.Interval, this=this, unit=unit) 4487 4488 index = self._index 4489 self._match(TokenType.PLUS) 4490 4491 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4492 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4493 return self.expression( 4494 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4495 ) 4496 4497 self._retreat(index) 4498 return interval 4499 4500 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4501 this = self._parse_term() 4502 4503 while True: 4504 if self._match_set(self.BITWISE): 4505 this = self.expression( 4506 self.BITWISE[self._prev.token_type], 4507 this=this, 4508 expression=self._parse_term(), 4509 ) 4510 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4511 this = self.expression( 4512 exp.DPipe, 4513 this=this, 4514 expression=self._parse_term(), 4515 safe=not self.dialect.STRICT_STRING_CONCAT, 4516 ) 4517 elif self._match(TokenType.DQMARK): 4518 this = self.expression( 4519 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4520 ) 4521 elif self._match_pair(TokenType.LT, TokenType.LT): 4522 this = self.expression( 4523 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4524 ) 4525 elif self._match_pair(TokenType.GT, TokenType.GT): 4526 this = self.expression( 4527 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4528 ) 4529 else: 4530 break 4531 4532 return this 4533 4534 def _parse_term(self) -> t.Optional[exp.Expression]: 4535 this = self._parse_factor() 4536 4537 while self._match_set(self.TERM): 4538 klass = self.TERM[self._prev.token_type] 4539 comments = self._prev_comments 4540 expression = self._parse_factor() 4541 4542 this = self.expression(klass, this=this, comments=comments, expression=expression) 4543 4544 if isinstance(this, exp.Collate): 4545 expr = this.expression 4546 4547 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4548 # fallback to Identifier / Var 4549 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4550 ident = expr.this 4551 if isinstance(ident, exp.Identifier): 4552 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4553 4554 return this 4555 4556 def _parse_factor(self) -> t.Optional[exp.Expression]: 4557 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4558 this = parse_method() 4559 4560 while self._match_set(self.FACTOR): 4561 klass = self.FACTOR[self._prev.token_type] 4562 comments = self._prev_comments 4563 expression = parse_method() 4564 4565 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4566 self._retreat(self._index - 1) 4567 return this 4568 4569 this = self.expression(klass, this=this, comments=comments, expression=expression) 4570 4571 if isinstance(this, exp.Div): 4572 this.args["typed"] = self.dialect.TYPED_DIVISION 4573 this.args["safe"] = self.dialect.SAFE_DIVISION 4574 4575 return this 4576 4577 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4578 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4579 4580 def _parse_unary(self) -> t.Optional[exp.Expression]: 4581 if self._match_set(self.UNARY_PARSERS): 4582 return self.UNARY_PARSERS[self._prev.token_type](self) 4583 return self._parse_at_time_zone(self._parse_type()) 4584 4585 def _parse_type( 4586 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4587 ) -> t.Optional[exp.Expression]: 4588 interval = parse_interval and self._parse_interval() 4589 if interval: 4590 return interval 4591 4592 index = self._index 4593 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4594 4595 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4596 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4597 if isinstance(data_type, exp.Cast): 4598 # This constructor can contain ops directly after it, for instance struct unnesting: 4599 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4600 return self._parse_column_ops(data_type) 4601 4602 if data_type: 4603 index2 = self._index 4604 this = self._parse_primary() 4605 4606 if isinstance(this, exp.Literal): 4607 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4608 if parser: 4609 return parser(self, this, data_type) 4610 4611 return self.expression(exp.Cast, this=this, to=data_type) 4612 4613 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4614 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4615 # 4616 # If the index difference here is greater than 1, that means the parser itself must have 4617 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4618 # 4619 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4620 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4621 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4622 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4623 # 4624 # In these cases, we don't really want to return the converted type, but instead retreat 4625 # and try to parse a Column or Identifier in the section below. 4626 if data_type.expressions and index2 - index > 1: 4627 self._retreat(index2) 4628 return self._parse_column_ops(data_type) 4629 4630 self._retreat(index) 4631 4632 if fallback_to_identifier: 4633 return self._parse_id_var() 4634 4635 this = self._parse_column() 4636 return this and self._parse_column_ops(this) 4637 4638 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4639 this = self._parse_type() 4640 if not this: 4641 return None 4642 4643 if isinstance(this, exp.Column) and not this.table: 4644 this = exp.var(this.name.upper()) 4645 4646 return self.expression( 4647 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4648 ) 4649 4650 def _parse_types( 4651 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4652 ) -> t.Optional[exp.Expression]: 4653 index = self._index 4654 4655 this: t.Optional[exp.Expression] = None 4656 prefix = self._match_text_seq("SYSUDTLIB", ".") 4657 4658 if not self._match_set(self.TYPE_TOKENS): 4659 identifier = allow_identifiers and self._parse_id_var( 4660 any_token=False, tokens=(TokenType.VAR,) 4661 ) 4662 if isinstance(identifier, exp.Identifier): 4663 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4664 4665 if len(tokens) != 1: 4666 self.raise_error("Unexpected identifier", self._prev) 4667 4668 if tokens[0].token_type in self.TYPE_TOKENS: 4669 self._prev = tokens[0] 4670 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4671 type_name = identifier.name 4672 4673 while self._match(TokenType.DOT): 4674 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4675 4676 this = exp.DataType.build(type_name, udt=True) 4677 else: 4678 self._retreat(self._index - 1) 4679 return None 4680 else: 4681 return None 4682 4683 type_token = self._prev.token_type 4684 4685 if type_token == TokenType.PSEUDO_TYPE: 4686 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4687 4688 if type_token == TokenType.OBJECT_IDENTIFIER: 4689 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4690 4691 # https://materialize.com/docs/sql/types/map/ 4692 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4693 key_type = self._parse_types( 4694 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4695 ) 4696 if not self._match(TokenType.FARROW): 4697 self._retreat(index) 4698 return None 4699 4700 value_type = self._parse_types( 4701 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4702 ) 4703 if not self._match(TokenType.R_BRACKET): 4704 self._retreat(index) 4705 return None 4706 4707 return exp.DataType( 4708 this=exp.DataType.Type.MAP, 4709 expressions=[key_type, value_type], 4710 nested=True, 4711 prefix=prefix, 4712 ) 4713 4714 nested = type_token in self.NESTED_TYPE_TOKENS 4715 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4716 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4717 expressions = None 4718 maybe_func = False 4719 4720 if self._match(TokenType.L_PAREN): 4721 if is_struct: 4722 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4723 elif nested: 4724 expressions = self._parse_csv( 4725 lambda: self._parse_types( 4726 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4727 ) 4728 ) 4729 elif type_token in self.ENUM_TYPE_TOKENS: 4730 expressions = self._parse_csv(self._parse_equality) 4731 elif is_aggregate: 4732 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4733 any_token=False, tokens=(TokenType.VAR,) 4734 ) 4735 if not func_or_ident or not self._match(TokenType.COMMA): 4736 return None 4737 expressions = self._parse_csv( 4738 lambda: self._parse_types( 4739 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4740 ) 4741 ) 4742 expressions.insert(0, func_or_ident) 4743 else: 4744 expressions = self._parse_csv(self._parse_type_size) 4745 4746 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4747 if type_token == TokenType.VECTOR and len(expressions) == 2: 4748 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4749 4750 if not expressions or not self._match(TokenType.R_PAREN): 4751 self._retreat(index) 4752 return None 4753 4754 maybe_func = True 4755 4756 values: t.Optional[t.List[exp.Expression]] = None 4757 4758 if nested and self._match(TokenType.LT): 4759 if is_struct: 4760 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4761 else: 4762 expressions = self._parse_csv( 4763 lambda: self._parse_types( 4764 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4765 ) 4766 ) 4767 4768 if not self._match(TokenType.GT): 4769 self.raise_error("Expecting >") 4770 4771 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4772 values = self._parse_csv(self._parse_assignment) 4773 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4774 4775 if type_token in self.TIMESTAMPS: 4776 if self._match_text_seq("WITH", "TIME", "ZONE"): 4777 maybe_func = False 4778 tz_type = ( 4779 exp.DataType.Type.TIMETZ 4780 if type_token in self.TIMES 4781 else exp.DataType.Type.TIMESTAMPTZ 4782 ) 4783 this = exp.DataType(this=tz_type, expressions=expressions) 4784 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4785 maybe_func = False 4786 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4787 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4788 maybe_func = False 4789 elif type_token == TokenType.INTERVAL: 4790 unit = self._parse_var(upper=True) 4791 if unit: 4792 if self._match_text_seq("TO"): 4793 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4794 4795 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4796 else: 4797 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4798 4799 if maybe_func and check_func: 4800 index2 = self._index 4801 peek = self._parse_string() 4802 4803 if not peek: 4804 self._retreat(index) 4805 return None 4806 4807 self._retreat(index2) 4808 4809 if not this: 4810 if self._match_text_seq("UNSIGNED"): 4811 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4812 if not unsigned_type_token: 4813 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4814 4815 type_token = unsigned_type_token or type_token 4816 4817 this = exp.DataType( 4818 this=exp.DataType.Type[type_token.value], 4819 expressions=expressions, 4820 nested=nested, 4821 prefix=prefix, 4822 ) 4823 4824 # Empty arrays/structs are allowed 4825 if values is not None: 4826 cls = exp.Struct if is_struct else exp.Array 4827 this = exp.cast(cls(expressions=values), this, copy=False) 4828 4829 elif expressions: 4830 this.set("expressions", expressions) 4831 4832 # https://materialize.com/docs/sql/types/list/#type-name 4833 while self._match(TokenType.LIST): 4834 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4835 4836 index = self._index 4837 4838 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4839 matched_array = self._match(TokenType.ARRAY) 4840 4841 while self._curr: 4842 datatype_token = self._prev.token_type 4843 matched_l_bracket = self._match(TokenType.L_BRACKET) 4844 if not matched_l_bracket and not matched_array: 4845 break 4846 4847 matched_array = False 4848 values = self._parse_csv(self._parse_assignment) or None 4849 if ( 4850 values 4851 and not schema 4852 and ( 4853 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4854 ) 4855 ): 4856 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4857 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4858 self._retreat(index) 4859 break 4860 4861 this = exp.DataType( 4862 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4863 ) 4864 self._match(TokenType.R_BRACKET) 4865 4866 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4867 converter = self.TYPE_CONVERTERS.get(this.this) 4868 if converter: 4869 this = converter(t.cast(exp.DataType, this)) 4870 4871 return this 4872 4873 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4874 index = self._index 4875 4876 if ( 4877 self._curr 4878 and self._next 4879 and self._curr.token_type in self.TYPE_TOKENS 4880 and self._next.token_type in self.TYPE_TOKENS 4881 ): 4882 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4883 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4884 this = self._parse_id_var() 4885 else: 4886 this = ( 4887 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4888 or self._parse_id_var() 4889 ) 4890 4891 self._match(TokenType.COLON) 4892 4893 if ( 4894 type_required 4895 and not isinstance(this, exp.DataType) 4896 and not self._match_set(self.TYPE_TOKENS, advance=False) 4897 ): 4898 self._retreat(index) 4899 return self._parse_types() 4900 4901 return self._parse_column_def(this) 4902 4903 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4904 if not self._match_text_seq("AT", "TIME", "ZONE"): 4905 return this 4906 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4907 4908 def _parse_column(self) -> t.Optional[exp.Expression]: 4909 this = self._parse_column_reference() 4910 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4911 4912 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4913 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4914 4915 return column 4916 4917 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4918 this = self._parse_field() 4919 if ( 4920 not this 4921 and self._match(TokenType.VALUES, advance=False) 4922 and self.VALUES_FOLLOWED_BY_PAREN 4923 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4924 ): 4925 this = self._parse_id_var() 4926 4927 if isinstance(this, exp.Identifier): 4928 # We bubble up comments from the Identifier to the Column 4929 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4930 4931 return this 4932 4933 def _parse_colon_as_variant_extract( 4934 self, this: t.Optional[exp.Expression] 4935 ) -> t.Optional[exp.Expression]: 4936 casts = [] 4937 json_path = [] 4938 4939 while self._match(TokenType.COLON): 4940 start_index = self._index 4941 4942 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4943 path = self._parse_column_ops( 4944 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4945 ) 4946 4947 # The cast :: operator has a lower precedence than the extraction operator :, so 4948 # we rearrange the AST appropriately to avoid casting the JSON path 4949 while isinstance(path, exp.Cast): 4950 casts.append(path.to) 4951 path = path.this 4952 4953 if casts: 4954 dcolon_offset = next( 4955 i 4956 for i, t in enumerate(self._tokens[start_index:]) 4957 if t.token_type == TokenType.DCOLON 4958 ) 4959 end_token = self._tokens[start_index + dcolon_offset - 1] 4960 else: 4961 end_token = self._prev 4962 4963 if path: 4964 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4965 4966 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4967 # Databricks transforms it back to the colon/dot notation 4968 if json_path: 4969 this = self.expression( 4970 exp.JSONExtract, 4971 this=this, 4972 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4973 variant_extract=True, 4974 ) 4975 4976 while casts: 4977 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4978 4979 return this 4980 4981 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4982 return self._parse_types() 4983 4984 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4985 this = self._parse_bracket(this) 4986 4987 while self._match_set(self.COLUMN_OPERATORS): 4988 op_token = self._prev.token_type 4989 op = self.COLUMN_OPERATORS.get(op_token) 4990 4991 if op_token == TokenType.DCOLON: 4992 field = self._parse_dcolon() 4993 if not field: 4994 self.raise_error("Expected type") 4995 elif op and self._curr: 4996 field = self._parse_column_reference() 4997 else: 4998 field = self._parse_field(any_token=True, anonymous_func=True) 4999 5000 if isinstance(field, exp.Func) and this: 5001 # bigquery allows function calls like x.y.count(...) 5002 # SAFE.SUBSTR(...) 5003 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5004 this = exp.replace_tree( 5005 this, 5006 lambda n: ( 5007 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5008 if n.table 5009 else n.this 5010 ) 5011 if isinstance(n, exp.Column) 5012 else n, 5013 ) 5014 5015 if op: 5016 this = op(self, this, field) 5017 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5018 this = self.expression( 5019 exp.Column, 5020 this=field, 5021 table=this.this, 5022 db=this.args.get("table"), 5023 catalog=this.args.get("db"), 5024 ) 5025 else: 5026 this = self.expression(exp.Dot, this=this, expression=field) 5027 5028 this = self._parse_bracket(this) 5029 5030 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5031 5032 def _parse_primary(self) -> t.Optional[exp.Expression]: 5033 if self._match_set(self.PRIMARY_PARSERS): 5034 token_type = self._prev.token_type 5035 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5036 5037 if token_type == TokenType.STRING: 5038 expressions = [primary] 5039 while self._match(TokenType.STRING): 5040 expressions.append(exp.Literal.string(self._prev.text)) 5041 5042 if len(expressions) > 1: 5043 return self.expression(exp.Concat, expressions=expressions) 5044 5045 return primary 5046 5047 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5048 return exp.Literal.number(f"0.{self._prev.text}") 5049 5050 if self._match(TokenType.L_PAREN): 5051 comments = self._prev_comments 5052 query = self._parse_select() 5053 5054 if query: 5055 expressions = [query] 5056 else: 5057 expressions = self._parse_expressions() 5058 5059 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5060 5061 if not this and self._match(TokenType.R_PAREN, advance=False): 5062 this = self.expression(exp.Tuple) 5063 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5064 this = self._parse_subquery(this=this, parse_alias=False) 5065 elif isinstance(this, exp.Subquery): 5066 this = self._parse_subquery( 5067 this=self._parse_set_operations(this), parse_alias=False 5068 ) 5069 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5070 this = self.expression(exp.Tuple, expressions=expressions) 5071 else: 5072 this = self.expression(exp.Paren, this=this) 5073 5074 if this: 5075 this.add_comments(comments) 5076 5077 self._match_r_paren(expression=this) 5078 return this 5079 5080 return None 5081 5082 def _parse_field( 5083 self, 5084 any_token: bool = False, 5085 tokens: t.Optional[t.Collection[TokenType]] = None, 5086 anonymous_func: bool = False, 5087 ) -> t.Optional[exp.Expression]: 5088 if anonymous_func: 5089 field = ( 5090 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5091 or self._parse_primary() 5092 ) 5093 else: 5094 field = self._parse_primary() or self._parse_function( 5095 anonymous=anonymous_func, any_token=any_token 5096 ) 5097 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5098 5099 def _parse_function( 5100 self, 5101 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5102 anonymous: bool = False, 5103 optional_parens: bool = True, 5104 any_token: bool = False, 5105 ) -> t.Optional[exp.Expression]: 5106 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5107 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5108 fn_syntax = False 5109 if ( 5110 self._match(TokenType.L_BRACE, advance=False) 5111 and self._next 5112 and self._next.text.upper() == "FN" 5113 ): 5114 self._advance(2) 5115 fn_syntax = True 5116 5117 func = self._parse_function_call( 5118 functions=functions, 5119 anonymous=anonymous, 5120 optional_parens=optional_parens, 5121 any_token=any_token, 5122 ) 5123 5124 if fn_syntax: 5125 self._match(TokenType.R_BRACE) 5126 5127 return func 5128 5129 def _parse_function_call( 5130 self, 5131 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5132 anonymous: bool = False, 5133 optional_parens: bool = True, 5134 any_token: bool = False, 5135 ) -> t.Optional[exp.Expression]: 5136 if not self._curr: 5137 return None 5138 5139 comments = self._curr.comments 5140 token_type = self._curr.token_type 5141 this = self._curr.text 5142 upper = this.upper() 5143 5144 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5145 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5146 self._advance() 5147 return self._parse_window(parser(self)) 5148 5149 if not self._next or self._next.token_type != TokenType.L_PAREN: 5150 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5151 self._advance() 5152 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5153 5154 return None 5155 5156 if any_token: 5157 if token_type in self.RESERVED_TOKENS: 5158 return None 5159 elif token_type not in self.FUNC_TOKENS: 5160 return None 5161 5162 self._advance(2) 5163 5164 parser = self.FUNCTION_PARSERS.get(upper) 5165 if parser and not anonymous: 5166 this = parser(self) 5167 else: 5168 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5169 5170 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5171 this = self.expression(subquery_predicate, this=self._parse_select()) 5172 self._match_r_paren() 5173 return this 5174 5175 if functions is None: 5176 functions = self.FUNCTIONS 5177 5178 function = functions.get(upper) 5179 5180 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5181 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5182 5183 if alias: 5184 args = self._kv_to_prop_eq(args) 5185 5186 if function and not anonymous: 5187 if "dialect" in function.__code__.co_varnames: 5188 func = function(args, dialect=self.dialect) 5189 else: 5190 func = function(args) 5191 5192 func = self.validate_expression(func, args) 5193 if not self.dialect.NORMALIZE_FUNCTIONS: 5194 func.meta["name"] = this 5195 5196 this = func 5197 else: 5198 if token_type == TokenType.IDENTIFIER: 5199 this = exp.Identifier(this=this, quoted=True) 5200 this = self.expression(exp.Anonymous, this=this, expressions=args) 5201 5202 if isinstance(this, exp.Expression): 5203 this.add_comments(comments) 5204 5205 self._match_r_paren(this) 5206 return self._parse_window(this) 5207 5208 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5209 return expression 5210 5211 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5212 transformed = [] 5213 5214 for index, e in enumerate(expressions): 5215 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5216 if isinstance(e, exp.Alias): 5217 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5218 5219 if not isinstance(e, exp.PropertyEQ): 5220 e = self.expression( 5221 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5222 ) 5223 5224 if isinstance(e.this, exp.Column): 5225 e.this.replace(e.this.this) 5226 else: 5227 e = self._to_prop_eq(e, index) 5228 5229 transformed.append(e) 5230 5231 return transformed 5232 5233 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5234 return self._parse_column_def(self._parse_id_var()) 5235 5236 def _parse_user_defined_function( 5237 self, kind: t.Optional[TokenType] = None 5238 ) -> t.Optional[exp.Expression]: 5239 this = self._parse_id_var() 5240 5241 while self._match(TokenType.DOT): 5242 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5243 5244 if not self._match(TokenType.L_PAREN): 5245 return this 5246 5247 expressions = self._parse_csv(self._parse_function_parameter) 5248 self._match_r_paren() 5249 return self.expression( 5250 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5251 ) 5252 5253 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5254 literal = self._parse_primary() 5255 if literal: 5256 return self.expression(exp.Introducer, this=token.text, expression=literal) 5257 5258 return self.expression(exp.Identifier, this=token.text) 5259 5260 def _parse_session_parameter(self) -> exp.SessionParameter: 5261 kind = None 5262 this = self._parse_id_var() or self._parse_primary() 5263 5264 if this and self._match(TokenType.DOT): 5265 kind = this.name 5266 this = self._parse_var() or self._parse_primary() 5267 5268 return self.expression(exp.SessionParameter, this=this, kind=kind) 5269 5270 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5271 return self._parse_id_var() 5272 5273 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5274 index = self._index 5275 5276 if self._match(TokenType.L_PAREN): 5277 expressions = t.cast( 5278 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5279 ) 5280 5281 if not self._match(TokenType.R_PAREN): 5282 self._retreat(index) 5283 else: 5284 expressions = [self._parse_lambda_arg()] 5285 5286 if self._match_set(self.LAMBDAS): 5287 return self.LAMBDAS[self._prev.token_type](self, expressions) 5288 5289 self._retreat(index) 5290 5291 this: t.Optional[exp.Expression] 5292 5293 if self._match(TokenType.DISTINCT): 5294 this = self.expression( 5295 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5296 ) 5297 else: 5298 this = self._parse_select_or_expression(alias=alias) 5299 5300 return self._parse_limit( 5301 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5302 ) 5303 5304 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5305 index = self._index 5306 if not self._match(TokenType.L_PAREN): 5307 return this 5308 5309 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5310 # expr can be of both types 5311 if self._match_set(self.SELECT_START_TOKENS): 5312 self._retreat(index) 5313 return this 5314 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5315 self._match_r_paren() 5316 return self.expression(exp.Schema, this=this, expressions=args) 5317 5318 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5319 return self._parse_column_def(self._parse_field(any_token=True)) 5320 5321 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5322 # column defs are not really columns, they're identifiers 5323 if isinstance(this, exp.Column): 5324 this = this.this 5325 5326 kind = self._parse_types(schema=True) 5327 5328 if self._match_text_seq("FOR", "ORDINALITY"): 5329 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5330 5331 constraints: t.List[exp.Expression] = [] 5332 5333 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5334 ("ALIAS", "MATERIALIZED") 5335 ): 5336 persisted = self._prev.text.upper() == "MATERIALIZED" 5337 constraints.append( 5338 self.expression( 5339 exp.ComputedColumnConstraint, 5340 this=self._parse_assignment(), 5341 persisted=persisted or self._match_text_seq("PERSISTED"), 5342 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5343 ) 5344 ) 5345 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5346 self._match(TokenType.ALIAS) 5347 constraints.append( 5348 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5349 ) 5350 5351 while True: 5352 constraint = self._parse_column_constraint() 5353 if not constraint: 5354 break 5355 constraints.append(constraint) 5356 5357 if not kind and not constraints: 5358 return this 5359 5360 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5361 5362 def _parse_auto_increment( 5363 self, 5364 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5365 start = None 5366 increment = None 5367 5368 if self._match(TokenType.L_PAREN, advance=False): 5369 args = self._parse_wrapped_csv(self._parse_bitwise) 5370 start = seq_get(args, 0) 5371 increment = seq_get(args, 1) 5372 elif self._match_text_seq("START"): 5373 start = self._parse_bitwise() 5374 self._match_text_seq("INCREMENT") 5375 increment = self._parse_bitwise() 5376 5377 if start and increment: 5378 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5379 5380 return exp.AutoIncrementColumnConstraint() 5381 5382 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5383 if not self._match_text_seq("REFRESH"): 5384 self._retreat(self._index - 1) 5385 return None 5386 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5387 5388 def _parse_compress(self) -> exp.CompressColumnConstraint: 5389 if self._match(TokenType.L_PAREN, advance=False): 5390 return self.expression( 5391 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5392 ) 5393 5394 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5395 5396 def _parse_generated_as_identity( 5397 self, 5398 ) -> ( 5399 exp.GeneratedAsIdentityColumnConstraint 5400 | exp.ComputedColumnConstraint 5401 | exp.GeneratedAsRowColumnConstraint 5402 ): 5403 if self._match_text_seq("BY", "DEFAULT"): 5404 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5405 this = self.expression( 5406 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5407 ) 5408 else: 5409 self._match_text_seq("ALWAYS") 5410 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5411 5412 self._match(TokenType.ALIAS) 5413 5414 if self._match_text_seq("ROW"): 5415 start = self._match_text_seq("START") 5416 if not start: 5417 self._match(TokenType.END) 5418 hidden = self._match_text_seq("HIDDEN") 5419 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5420 5421 identity = self._match_text_seq("IDENTITY") 5422 5423 if self._match(TokenType.L_PAREN): 5424 if self._match(TokenType.START_WITH): 5425 this.set("start", self._parse_bitwise()) 5426 if self._match_text_seq("INCREMENT", "BY"): 5427 this.set("increment", self._parse_bitwise()) 5428 if self._match_text_seq("MINVALUE"): 5429 this.set("minvalue", self._parse_bitwise()) 5430 if self._match_text_seq("MAXVALUE"): 5431 this.set("maxvalue", self._parse_bitwise()) 5432 5433 if self._match_text_seq("CYCLE"): 5434 this.set("cycle", True) 5435 elif self._match_text_seq("NO", "CYCLE"): 5436 this.set("cycle", False) 5437 5438 if not identity: 5439 this.set("expression", self._parse_range()) 5440 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5441 args = self._parse_csv(self._parse_bitwise) 5442 this.set("start", seq_get(args, 0)) 5443 this.set("increment", seq_get(args, 1)) 5444 5445 self._match_r_paren() 5446 5447 return this 5448 5449 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5450 self._match_text_seq("LENGTH") 5451 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5452 5453 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5454 if self._match_text_seq("NULL"): 5455 return self.expression(exp.NotNullColumnConstraint) 5456 if self._match_text_seq("CASESPECIFIC"): 5457 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5458 if self._match_text_seq("FOR", "REPLICATION"): 5459 return self.expression(exp.NotForReplicationColumnConstraint) 5460 return None 5461 5462 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5463 if self._match(TokenType.CONSTRAINT): 5464 this = self._parse_id_var() 5465 else: 5466 this = None 5467 5468 if self._match_texts(self.CONSTRAINT_PARSERS): 5469 return self.expression( 5470 exp.ColumnConstraint, 5471 this=this, 5472 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5473 ) 5474 5475 return this 5476 5477 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5478 if not self._match(TokenType.CONSTRAINT): 5479 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5480 5481 return self.expression( 5482 exp.Constraint, 5483 this=self._parse_id_var(), 5484 expressions=self._parse_unnamed_constraints(), 5485 ) 5486 5487 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5488 constraints = [] 5489 while True: 5490 constraint = self._parse_unnamed_constraint() or self._parse_function() 5491 if not constraint: 5492 break 5493 constraints.append(constraint) 5494 5495 return constraints 5496 5497 def _parse_unnamed_constraint( 5498 self, constraints: t.Optional[t.Collection[str]] = None 5499 ) -> t.Optional[exp.Expression]: 5500 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5501 constraints or self.CONSTRAINT_PARSERS 5502 ): 5503 return None 5504 5505 constraint = self._prev.text.upper() 5506 if constraint not in self.CONSTRAINT_PARSERS: 5507 self.raise_error(f"No parser found for schema constraint {constraint}.") 5508 5509 return self.CONSTRAINT_PARSERS[constraint](self) 5510 5511 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5512 return self._parse_id_var(any_token=False) 5513 5514 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5515 self._match_text_seq("KEY") 5516 return self.expression( 5517 exp.UniqueColumnConstraint, 5518 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5519 this=self._parse_schema(self._parse_unique_key()), 5520 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5521 on_conflict=self._parse_on_conflict(), 5522 ) 5523 5524 def _parse_key_constraint_options(self) -> t.List[str]: 5525 options = [] 5526 while True: 5527 if not self._curr: 5528 break 5529 5530 if self._match(TokenType.ON): 5531 action = None 5532 on = self._advance_any() and self._prev.text 5533 5534 if self._match_text_seq("NO", "ACTION"): 5535 action = "NO ACTION" 5536 elif self._match_text_seq("CASCADE"): 5537 action = "CASCADE" 5538 elif self._match_text_seq("RESTRICT"): 5539 action = "RESTRICT" 5540 elif self._match_pair(TokenType.SET, TokenType.NULL): 5541 action = "SET NULL" 5542 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5543 action = "SET DEFAULT" 5544 else: 5545 self.raise_error("Invalid key constraint") 5546 5547 options.append(f"ON {on} {action}") 5548 else: 5549 var = self._parse_var_from_options( 5550 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5551 ) 5552 if not var: 5553 break 5554 options.append(var.name) 5555 5556 return options 5557 5558 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5559 if match and not self._match(TokenType.REFERENCES): 5560 return None 5561 5562 expressions = None 5563 this = self._parse_table(schema=True) 5564 options = self._parse_key_constraint_options() 5565 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5566 5567 def _parse_foreign_key(self) -> exp.ForeignKey: 5568 expressions = self._parse_wrapped_id_vars() 5569 reference = self._parse_references() 5570 options = {} 5571 5572 while self._match(TokenType.ON): 5573 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5574 self.raise_error("Expected DELETE or UPDATE") 5575 5576 kind = self._prev.text.lower() 5577 5578 if self._match_text_seq("NO", "ACTION"): 5579 action = "NO ACTION" 5580 elif self._match(TokenType.SET): 5581 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5582 action = "SET " + self._prev.text.upper() 5583 else: 5584 self._advance() 5585 action = self._prev.text.upper() 5586 5587 options[kind] = action 5588 5589 return self.expression( 5590 exp.ForeignKey, 5591 expressions=expressions, 5592 reference=reference, 5593 **options, # type: ignore 5594 ) 5595 5596 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5597 return self._parse_field() 5598 5599 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5600 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5601 self._retreat(self._index - 1) 5602 return None 5603 5604 id_vars = self._parse_wrapped_id_vars() 5605 return self.expression( 5606 exp.PeriodForSystemTimeConstraint, 5607 this=seq_get(id_vars, 0), 5608 expression=seq_get(id_vars, 1), 5609 ) 5610 5611 def _parse_primary_key( 5612 self, wrapped_optional: bool = False, in_props: bool = False 5613 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5614 desc = ( 5615 self._match_set((TokenType.ASC, TokenType.DESC)) 5616 and self._prev.token_type == TokenType.DESC 5617 ) 5618 5619 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5620 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5621 5622 expressions = self._parse_wrapped_csv( 5623 self._parse_primary_key_part, optional=wrapped_optional 5624 ) 5625 options = self._parse_key_constraint_options() 5626 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5627 5628 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5629 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5630 5631 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5632 """ 5633 Parses a datetime column in ODBC format. We parse the column into the corresponding 5634 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5635 same as we did for `DATE('yyyy-mm-dd')`. 5636 5637 Reference: 5638 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5639 """ 5640 self._match(TokenType.VAR) 5641 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5642 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5643 if not self._match(TokenType.R_BRACE): 5644 self.raise_error("Expected }") 5645 return expression 5646 5647 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5648 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5649 return this 5650 5651 bracket_kind = self._prev.token_type 5652 if ( 5653 bracket_kind == TokenType.L_BRACE 5654 and self._curr 5655 and self._curr.token_type == TokenType.VAR 5656 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5657 ): 5658 return self._parse_odbc_datetime_literal() 5659 5660 expressions = self._parse_csv( 5661 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5662 ) 5663 5664 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5665 self.raise_error("Expected ]") 5666 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5667 self.raise_error("Expected }") 5668 5669 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5670 if bracket_kind == TokenType.L_BRACE: 5671 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5672 elif not this: 5673 this = build_array_constructor( 5674 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5675 ) 5676 else: 5677 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5678 if constructor_type: 5679 return build_array_constructor( 5680 constructor_type, 5681 args=expressions, 5682 bracket_kind=bracket_kind, 5683 dialect=self.dialect, 5684 ) 5685 5686 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5687 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5688 5689 self._add_comments(this) 5690 return self._parse_bracket(this) 5691 5692 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5693 if self._match(TokenType.COLON): 5694 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5695 return this 5696 5697 def _parse_case(self) -> t.Optional[exp.Expression]: 5698 ifs = [] 5699 default = None 5700 5701 comments = self._prev_comments 5702 expression = self._parse_assignment() 5703 5704 while self._match(TokenType.WHEN): 5705 this = self._parse_assignment() 5706 self._match(TokenType.THEN) 5707 then = self._parse_assignment() 5708 ifs.append(self.expression(exp.If, this=this, true=then)) 5709 5710 if self._match(TokenType.ELSE): 5711 default = self._parse_assignment() 5712 5713 if not self._match(TokenType.END): 5714 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5715 default = exp.column("interval") 5716 else: 5717 self.raise_error("Expected END after CASE", self._prev) 5718 5719 return self.expression( 5720 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5721 ) 5722 5723 def _parse_if(self) -> t.Optional[exp.Expression]: 5724 if self._match(TokenType.L_PAREN): 5725 args = self._parse_csv(self._parse_assignment) 5726 this = self.validate_expression(exp.If.from_arg_list(args), args) 5727 self._match_r_paren() 5728 else: 5729 index = self._index - 1 5730 5731 if self.NO_PAREN_IF_COMMANDS and index == 0: 5732 return self._parse_as_command(self._prev) 5733 5734 condition = self._parse_assignment() 5735 5736 if not condition: 5737 self._retreat(index) 5738 return None 5739 5740 self._match(TokenType.THEN) 5741 true = self._parse_assignment() 5742 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5743 self._match(TokenType.END) 5744 this = self.expression(exp.If, this=condition, true=true, false=false) 5745 5746 return this 5747 5748 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5749 if not self._match_text_seq("VALUE", "FOR"): 5750 self._retreat(self._index - 1) 5751 return None 5752 5753 return self.expression( 5754 exp.NextValueFor, 5755 this=self._parse_column(), 5756 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5757 ) 5758 5759 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5760 this = self._parse_function() or self._parse_var_or_string(upper=True) 5761 5762 if self._match(TokenType.FROM): 5763 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5764 5765 if not self._match(TokenType.COMMA): 5766 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5767 5768 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5769 5770 def _parse_gap_fill(self) -> exp.GapFill: 5771 self._match(TokenType.TABLE) 5772 this = self._parse_table() 5773 5774 self._match(TokenType.COMMA) 5775 args = [this, *self._parse_csv(self._parse_lambda)] 5776 5777 gap_fill = exp.GapFill.from_arg_list(args) 5778 return self.validate_expression(gap_fill, args) 5779 5780 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5781 this = self._parse_assignment() 5782 5783 if not self._match(TokenType.ALIAS): 5784 if self._match(TokenType.COMMA): 5785 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5786 5787 self.raise_error("Expected AS after CAST") 5788 5789 fmt = None 5790 to = self._parse_types() 5791 5792 if self._match(TokenType.FORMAT): 5793 fmt_string = self._parse_string() 5794 fmt = self._parse_at_time_zone(fmt_string) 5795 5796 if not to: 5797 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5798 if to.this in exp.DataType.TEMPORAL_TYPES: 5799 this = self.expression( 5800 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5801 this=this, 5802 format=exp.Literal.string( 5803 format_time( 5804 fmt_string.this if fmt_string else "", 5805 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5806 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5807 ) 5808 ), 5809 safe=safe, 5810 ) 5811 5812 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5813 this.set("zone", fmt.args["zone"]) 5814 return this 5815 elif not to: 5816 self.raise_error("Expected TYPE after CAST") 5817 elif isinstance(to, exp.Identifier): 5818 to = exp.DataType.build(to.name, udt=True) 5819 elif to.this == exp.DataType.Type.CHAR: 5820 if self._match(TokenType.CHARACTER_SET): 5821 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5822 5823 return self.expression( 5824 exp.Cast if strict else exp.TryCast, 5825 this=this, 5826 to=to, 5827 format=fmt, 5828 safe=safe, 5829 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5830 ) 5831 5832 def _parse_string_agg(self) -> exp.Expression: 5833 if self._match(TokenType.DISTINCT): 5834 args: t.List[t.Optional[exp.Expression]] = [ 5835 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5836 ] 5837 if self._match(TokenType.COMMA): 5838 args.extend(self._parse_csv(self._parse_assignment)) 5839 else: 5840 args = self._parse_csv(self._parse_assignment) # type: ignore 5841 5842 index = self._index 5843 if not self._match(TokenType.R_PAREN) and args: 5844 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5845 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5846 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5847 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5848 5849 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5850 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5851 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5852 if not self._match_text_seq("WITHIN", "GROUP"): 5853 self._retreat(index) 5854 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5855 5856 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5857 order = self._parse_order(this=seq_get(args, 0)) 5858 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5859 5860 def _parse_convert( 5861 self, strict: bool, safe: t.Optional[bool] = None 5862 ) -> t.Optional[exp.Expression]: 5863 this = self._parse_bitwise() 5864 5865 if self._match(TokenType.USING): 5866 to: t.Optional[exp.Expression] = self.expression( 5867 exp.CharacterSet, this=self._parse_var() 5868 ) 5869 elif self._match(TokenType.COMMA): 5870 to = self._parse_types() 5871 else: 5872 to = None 5873 5874 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5875 5876 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5877 """ 5878 There are generally two variants of the DECODE function: 5879 5880 - DECODE(bin, charset) 5881 - DECODE(expression, search, result [, search, result] ... [, default]) 5882 5883 The second variant will always be parsed into a CASE expression. Note that NULL 5884 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5885 instead of relying on pattern matching. 5886 """ 5887 args = self._parse_csv(self._parse_assignment) 5888 5889 if len(args) < 3: 5890 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5891 5892 expression, *expressions = args 5893 if not expression: 5894 return None 5895 5896 ifs = [] 5897 for search, result in zip(expressions[::2], expressions[1::2]): 5898 if not search or not result: 5899 return None 5900 5901 if isinstance(search, exp.Literal): 5902 ifs.append( 5903 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5904 ) 5905 elif isinstance(search, exp.Null): 5906 ifs.append( 5907 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5908 ) 5909 else: 5910 cond = exp.or_( 5911 exp.EQ(this=expression.copy(), expression=search), 5912 exp.and_( 5913 exp.Is(this=expression.copy(), expression=exp.Null()), 5914 exp.Is(this=search.copy(), expression=exp.Null()), 5915 copy=False, 5916 ), 5917 copy=False, 5918 ) 5919 ifs.append(exp.If(this=cond, true=result)) 5920 5921 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5922 5923 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5924 self._match_text_seq("KEY") 5925 key = self._parse_column() 5926 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5927 self._match_text_seq("VALUE") 5928 value = self._parse_bitwise() 5929 5930 if not key and not value: 5931 return None 5932 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5933 5934 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5935 if not this or not self._match_text_seq("FORMAT", "JSON"): 5936 return this 5937 5938 return self.expression(exp.FormatJson, this=this) 5939 5940 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5941 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5942 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5943 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5944 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5945 else: 5946 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5947 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5948 5949 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 5950 5951 if not empty and not error and not null: 5952 return None 5953 5954 return self.expression( 5955 exp.OnCondition, 5956 empty=empty, 5957 error=error, 5958 null=null, 5959 ) 5960 5961 def _parse_on_handling( 5962 self, on: str, *values: str 5963 ) -> t.Optional[str] | t.Optional[exp.Expression]: 5964 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 5965 for value in values: 5966 if self._match_text_seq(value, "ON", on): 5967 return f"{value} ON {on}" 5968 5969 index = self._index 5970 if self._match(TokenType.DEFAULT): 5971 default_value = self._parse_bitwise() 5972 if self._match_text_seq("ON", on): 5973 return default_value 5974 5975 self._retreat(index) 5976 5977 return None 5978 5979 @t.overload 5980 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5981 5982 @t.overload 5983 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5984 5985 def _parse_json_object(self, agg=False): 5986 star = self._parse_star() 5987 expressions = ( 5988 [star] 5989 if star 5990 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5991 ) 5992 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5993 5994 unique_keys = None 5995 if self._match_text_seq("WITH", "UNIQUE"): 5996 unique_keys = True 5997 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5998 unique_keys = False 5999 6000 self._match_text_seq("KEYS") 6001 6002 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6003 self._parse_type() 6004 ) 6005 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6006 6007 return self.expression( 6008 exp.JSONObjectAgg if agg else exp.JSONObject, 6009 expressions=expressions, 6010 null_handling=null_handling, 6011 unique_keys=unique_keys, 6012 return_type=return_type, 6013 encoding=encoding, 6014 ) 6015 6016 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6017 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6018 if not self._match_text_seq("NESTED"): 6019 this = self._parse_id_var() 6020 kind = self._parse_types(allow_identifiers=False) 6021 nested = None 6022 else: 6023 this = None 6024 kind = None 6025 nested = True 6026 6027 path = self._match_text_seq("PATH") and self._parse_string() 6028 nested_schema = nested and self._parse_json_schema() 6029 6030 return self.expression( 6031 exp.JSONColumnDef, 6032 this=this, 6033 kind=kind, 6034 path=path, 6035 nested_schema=nested_schema, 6036 ) 6037 6038 def _parse_json_schema(self) -> exp.JSONSchema: 6039 self._match_text_seq("COLUMNS") 6040 return self.expression( 6041 exp.JSONSchema, 6042 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6043 ) 6044 6045 def _parse_json_table(self) -> exp.JSONTable: 6046 this = self._parse_format_json(self._parse_bitwise()) 6047 path = self._match(TokenType.COMMA) and self._parse_string() 6048 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6049 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6050 schema = self._parse_json_schema() 6051 6052 return exp.JSONTable( 6053 this=this, 6054 schema=schema, 6055 path=path, 6056 error_handling=error_handling, 6057 empty_handling=empty_handling, 6058 ) 6059 6060 def _parse_match_against(self) -> exp.MatchAgainst: 6061 expressions = self._parse_csv(self._parse_column) 6062 6063 self._match_text_seq(")", "AGAINST", "(") 6064 6065 this = self._parse_string() 6066 6067 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6068 modifier = "IN NATURAL LANGUAGE MODE" 6069 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6070 modifier = f"{modifier} WITH QUERY EXPANSION" 6071 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6072 modifier = "IN BOOLEAN MODE" 6073 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6074 modifier = "WITH QUERY EXPANSION" 6075 else: 6076 modifier = None 6077 6078 return self.expression( 6079 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6080 ) 6081 6082 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6083 def _parse_open_json(self) -> exp.OpenJSON: 6084 this = self._parse_bitwise() 6085 path = self._match(TokenType.COMMA) and self._parse_string() 6086 6087 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6088 this = self._parse_field(any_token=True) 6089 kind = self._parse_types() 6090 path = self._parse_string() 6091 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6092 6093 return self.expression( 6094 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6095 ) 6096 6097 expressions = None 6098 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6099 self._match_l_paren() 6100 expressions = self._parse_csv(_parse_open_json_column_def) 6101 6102 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6103 6104 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6105 args = self._parse_csv(self._parse_bitwise) 6106 6107 if self._match(TokenType.IN): 6108 return self.expression( 6109 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6110 ) 6111 6112 if haystack_first: 6113 haystack = seq_get(args, 0) 6114 needle = seq_get(args, 1) 6115 else: 6116 needle = seq_get(args, 0) 6117 haystack = seq_get(args, 1) 6118 6119 return self.expression( 6120 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6121 ) 6122 6123 def _parse_predict(self) -> exp.Predict: 6124 self._match_text_seq("MODEL") 6125 this = self._parse_table() 6126 6127 self._match(TokenType.COMMA) 6128 self._match_text_seq("TABLE") 6129 6130 return self.expression( 6131 exp.Predict, 6132 this=this, 6133 expression=self._parse_table(), 6134 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6135 ) 6136 6137 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6138 args = self._parse_csv(self._parse_table) 6139 return exp.JoinHint(this=func_name.upper(), expressions=args) 6140 6141 def _parse_substring(self) -> exp.Substring: 6142 # Postgres supports the form: substring(string [from int] [for int]) 6143 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6144 6145 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6146 6147 if self._match(TokenType.FROM): 6148 args.append(self._parse_bitwise()) 6149 if self._match(TokenType.FOR): 6150 if len(args) == 1: 6151 args.append(exp.Literal.number(1)) 6152 args.append(self._parse_bitwise()) 6153 6154 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6155 6156 def _parse_trim(self) -> exp.Trim: 6157 # https://www.w3resource.com/sql/character-functions/trim.php 6158 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6159 6160 position = None 6161 collation = None 6162 expression = None 6163 6164 if self._match_texts(self.TRIM_TYPES): 6165 position = self._prev.text.upper() 6166 6167 this = self._parse_bitwise() 6168 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6169 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6170 expression = self._parse_bitwise() 6171 6172 if invert_order: 6173 this, expression = expression, this 6174 6175 if self._match(TokenType.COLLATE): 6176 collation = self._parse_bitwise() 6177 6178 return self.expression( 6179 exp.Trim, this=this, position=position, expression=expression, collation=collation 6180 ) 6181 6182 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6183 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6184 6185 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6186 return self._parse_window(self._parse_id_var(), alias=True) 6187 6188 def _parse_respect_or_ignore_nulls( 6189 self, this: t.Optional[exp.Expression] 6190 ) -> t.Optional[exp.Expression]: 6191 if self._match_text_seq("IGNORE", "NULLS"): 6192 return self.expression(exp.IgnoreNulls, this=this) 6193 if self._match_text_seq("RESPECT", "NULLS"): 6194 return self.expression(exp.RespectNulls, this=this) 6195 return this 6196 6197 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6198 if self._match(TokenType.HAVING): 6199 self._match_texts(("MAX", "MIN")) 6200 max = self._prev.text.upper() != "MIN" 6201 return self.expression( 6202 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6203 ) 6204 6205 return this 6206 6207 def _parse_window( 6208 self, this: t.Optional[exp.Expression], alias: bool = False 6209 ) -> t.Optional[exp.Expression]: 6210 func = this 6211 comments = func.comments if isinstance(func, exp.Expression) else None 6212 6213 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6214 self._match(TokenType.WHERE) 6215 this = self.expression( 6216 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6217 ) 6218 self._match_r_paren() 6219 6220 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6221 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6222 if self._match_text_seq("WITHIN", "GROUP"): 6223 order = self._parse_wrapped(self._parse_order) 6224 this = self.expression(exp.WithinGroup, this=this, expression=order) 6225 6226 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6227 # Some dialects choose to implement and some do not. 6228 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6229 6230 # There is some code above in _parse_lambda that handles 6231 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6232 6233 # The below changes handle 6234 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6235 6236 # Oracle allows both formats 6237 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6238 # and Snowflake chose to do the same for familiarity 6239 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6240 if isinstance(this, exp.AggFunc): 6241 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6242 6243 if ignore_respect and ignore_respect is not this: 6244 ignore_respect.replace(ignore_respect.this) 6245 this = self.expression(ignore_respect.__class__, this=this) 6246 6247 this = self._parse_respect_or_ignore_nulls(this) 6248 6249 # bigquery select from window x AS (partition by ...) 6250 if alias: 6251 over = None 6252 self._match(TokenType.ALIAS) 6253 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6254 return this 6255 else: 6256 over = self._prev.text.upper() 6257 6258 if comments and isinstance(func, exp.Expression): 6259 func.pop_comments() 6260 6261 if not self._match(TokenType.L_PAREN): 6262 return self.expression( 6263 exp.Window, 6264 comments=comments, 6265 this=this, 6266 alias=self._parse_id_var(False), 6267 over=over, 6268 ) 6269 6270 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6271 6272 first = self._match(TokenType.FIRST) 6273 if self._match_text_seq("LAST"): 6274 first = False 6275 6276 partition, order = self._parse_partition_and_order() 6277 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6278 6279 if kind: 6280 self._match(TokenType.BETWEEN) 6281 start = self._parse_window_spec() 6282 self._match(TokenType.AND) 6283 end = self._parse_window_spec() 6284 6285 spec = self.expression( 6286 exp.WindowSpec, 6287 kind=kind, 6288 start=start["value"], 6289 start_side=start["side"], 6290 end=end["value"], 6291 end_side=end["side"], 6292 ) 6293 else: 6294 spec = None 6295 6296 self._match_r_paren() 6297 6298 window = self.expression( 6299 exp.Window, 6300 comments=comments, 6301 this=this, 6302 partition_by=partition, 6303 order=order, 6304 spec=spec, 6305 alias=window_alias, 6306 over=over, 6307 first=first, 6308 ) 6309 6310 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6311 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6312 return self._parse_window(window, alias=alias) 6313 6314 return window 6315 6316 def _parse_partition_and_order( 6317 self, 6318 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6319 return self._parse_partition_by(), self._parse_order() 6320 6321 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6322 self._match(TokenType.BETWEEN) 6323 6324 return { 6325 "value": ( 6326 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6327 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6328 or self._parse_bitwise() 6329 ), 6330 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6331 } 6332 6333 def _parse_alias( 6334 self, this: t.Optional[exp.Expression], explicit: bool = False 6335 ) -> t.Optional[exp.Expression]: 6336 any_token = self._match(TokenType.ALIAS) 6337 comments = self._prev_comments or [] 6338 6339 if explicit and not any_token: 6340 return this 6341 6342 if self._match(TokenType.L_PAREN): 6343 aliases = self.expression( 6344 exp.Aliases, 6345 comments=comments, 6346 this=this, 6347 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6348 ) 6349 self._match_r_paren(aliases) 6350 return aliases 6351 6352 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6353 self.STRING_ALIASES and self._parse_string_as_identifier() 6354 ) 6355 6356 if alias: 6357 comments.extend(alias.pop_comments()) 6358 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6359 column = this.this 6360 6361 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6362 if not this.comments and column and column.comments: 6363 this.comments = column.pop_comments() 6364 6365 return this 6366 6367 def _parse_id_var( 6368 self, 6369 any_token: bool = True, 6370 tokens: t.Optional[t.Collection[TokenType]] = None, 6371 ) -> t.Optional[exp.Expression]: 6372 expression = self._parse_identifier() 6373 if not expression and ( 6374 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6375 ): 6376 quoted = self._prev.token_type == TokenType.STRING 6377 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6378 6379 return expression 6380 6381 def _parse_string(self) -> t.Optional[exp.Expression]: 6382 if self._match_set(self.STRING_PARSERS): 6383 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6384 return self._parse_placeholder() 6385 6386 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6387 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6388 6389 def _parse_number(self) -> t.Optional[exp.Expression]: 6390 if self._match_set(self.NUMERIC_PARSERS): 6391 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6392 return self._parse_placeholder() 6393 6394 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6395 if self._match(TokenType.IDENTIFIER): 6396 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6397 return self._parse_placeholder() 6398 6399 def _parse_var( 6400 self, 6401 any_token: bool = False, 6402 tokens: t.Optional[t.Collection[TokenType]] = None, 6403 upper: bool = False, 6404 ) -> t.Optional[exp.Expression]: 6405 if ( 6406 (any_token and self._advance_any()) 6407 or self._match(TokenType.VAR) 6408 or (self._match_set(tokens) if tokens else False) 6409 ): 6410 return self.expression( 6411 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6412 ) 6413 return self._parse_placeholder() 6414 6415 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6416 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6417 self._advance() 6418 return self._prev 6419 return None 6420 6421 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6422 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6423 6424 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6425 return self._parse_primary() or self._parse_var(any_token=True) 6426 6427 def _parse_null(self) -> t.Optional[exp.Expression]: 6428 if self._match_set(self.NULL_TOKENS): 6429 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6430 return self._parse_placeholder() 6431 6432 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6433 if self._match(TokenType.TRUE): 6434 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6435 if self._match(TokenType.FALSE): 6436 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6437 return self._parse_placeholder() 6438 6439 def _parse_star(self) -> t.Optional[exp.Expression]: 6440 if self._match(TokenType.STAR): 6441 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6442 return self._parse_placeholder() 6443 6444 def _parse_parameter(self) -> exp.Parameter: 6445 this = self._parse_identifier() or self._parse_primary_or_var() 6446 return self.expression(exp.Parameter, this=this) 6447 6448 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6449 if self._match_set(self.PLACEHOLDER_PARSERS): 6450 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6451 if placeholder: 6452 return placeholder 6453 self._advance(-1) 6454 return None 6455 6456 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6457 if not self._match_texts(keywords): 6458 return None 6459 if self._match(TokenType.L_PAREN, advance=False): 6460 return self._parse_wrapped_csv(self._parse_expression) 6461 6462 expression = self._parse_expression() 6463 return [expression] if expression else None 6464 6465 def _parse_csv( 6466 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6467 ) -> t.List[exp.Expression]: 6468 parse_result = parse_method() 6469 items = [parse_result] if parse_result is not None else [] 6470 6471 while self._match(sep): 6472 self._add_comments(parse_result) 6473 parse_result = parse_method() 6474 if parse_result is not None: 6475 items.append(parse_result) 6476 6477 return items 6478 6479 def _parse_tokens( 6480 self, parse_method: t.Callable, expressions: t.Dict 6481 ) -> t.Optional[exp.Expression]: 6482 this = parse_method() 6483 6484 while self._match_set(expressions): 6485 this = self.expression( 6486 expressions[self._prev.token_type], 6487 this=this, 6488 comments=self._prev_comments, 6489 expression=parse_method(), 6490 ) 6491 6492 return this 6493 6494 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6495 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6496 6497 def _parse_wrapped_csv( 6498 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6499 ) -> t.List[exp.Expression]: 6500 return self._parse_wrapped( 6501 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6502 ) 6503 6504 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6505 wrapped = self._match(TokenType.L_PAREN) 6506 if not wrapped and not optional: 6507 self.raise_error("Expecting (") 6508 parse_result = parse_method() 6509 if wrapped: 6510 self._match_r_paren() 6511 return parse_result 6512 6513 def _parse_expressions(self) -> t.List[exp.Expression]: 6514 return self._parse_csv(self._parse_expression) 6515 6516 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6517 return self._parse_select() or self._parse_set_operations( 6518 self._parse_expression() if alias else self._parse_assignment() 6519 ) 6520 6521 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6522 return self._parse_query_modifiers( 6523 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6524 ) 6525 6526 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6527 this = None 6528 if self._match_texts(self.TRANSACTION_KIND): 6529 this = self._prev.text 6530 6531 self._match_texts(("TRANSACTION", "WORK")) 6532 6533 modes = [] 6534 while True: 6535 mode = [] 6536 while self._match(TokenType.VAR): 6537 mode.append(self._prev.text) 6538 6539 if mode: 6540 modes.append(" ".join(mode)) 6541 if not self._match(TokenType.COMMA): 6542 break 6543 6544 return self.expression(exp.Transaction, this=this, modes=modes) 6545 6546 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6547 chain = None 6548 savepoint = None 6549 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6550 6551 self._match_texts(("TRANSACTION", "WORK")) 6552 6553 if self._match_text_seq("TO"): 6554 self._match_text_seq("SAVEPOINT") 6555 savepoint = self._parse_id_var() 6556 6557 if self._match(TokenType.AND): 6558 chain = not self._match_text_seq("NO") 6559 self._match_text_seq("CHAIN") 6560 6561 if is_rollback: 6562 return self.expression(exp.Rollback, savepoint=savepoint) 6563 6564 return self.expression(exp.Commit, chain=chain) 6565 6566 def _parse_refresh(self) -> exp.Refresh: 6567 self._match(TokenType.TABLE) 6568 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6569 6570 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6571 if not self._match_text_seq("ADD"): 6572 return None 6573 6574 self._match(TokenType.COLUMN) 6575 exists_column = self._parse_exists(not_=True) 6576 expression = self._parse_field_def() 6577 6578 if expression: 6579 expression.set("exists", exists_column) 6580 6581 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6582 if self._match_texts(("FIRST", "AFTER")): 6583 position = self._prev.text 6584 column_position = self.expression( 6585 exp.ColumnPosition, this=self._parse_column(), position=position 6586 ) 6587 expression.set("position", column_position) 6588 6589 return expression 6590 6591 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6592 drop = self._match(TokenType.DROP) and self._parse_drop() 6593 if drop and not isinstance(drop, exp.Command): 6594 drop.set("kind", drop.args.get("kind", "COLUMN")) 6595 return drop 6596 6597 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6598 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6599 return self.expression( 6600 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6601 ) 6602 6603 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6604 index = self._index - 1 6605 6606 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6607 return self._parse_csv( 6608 lambda: self.expression( 6609 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6610 ) 6611 ) 6612 6613 self._retreat(index) 6614 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6615 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6616 6617 if self._match_text_seq("ADD", "COLUMNS"): 6618 schema = self._parse_schema() 6619 if schema: 6620 return [schema] 6621 return [] 6622 6623 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6624 6625 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6626 if self._match_texts(self.ALTER_ALTER_PARSERS): 6627 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6628 6629 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6630 # keyword after ALTER we default to parsing this statement 6631 self._match(TokenType.COLUMN) 6632 column = self._parse_field(any_token=True) 6633 6634 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6635 return self.expression(exp.AlterColumn, this=column, drop=True) 6636 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6637 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6638 if self._match(TokenType.COMMENT): 6639 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6640 if self._match_text_seq("DROP", "NOT", "NULL"): 6641 return self.expression( 6642 exp.AlterColumn, 6643 this=column, 6644 drop=True, 6645 allow_null=True, 6646 ) 6647 if self._match_text_seq("SET", "NOT", "NULL"): 6648 return self.expression( 6649 exp.AlterColumn, 6650 this=column, 6651 allow_null=False, 6652 ) 6653 self._match_text_seq("SET", "DATA") 6654 self._match_text_seq("TYPE") 6655 return self.expression( 6656 exp.AlterColumn, 6657 this=column, 6658 dtype=self._parse_types(), 6659 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6660 using=self._match(TokenType.USING) and self._parse_assignment(), 6661 ) 6662 6663 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6664 if self._match_texts(("ALL", "EVEN", "AUTO")): 6665 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6666 6667 self._match_text_seq("KEY", "DISTKEY") 6668 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6669 6670 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6671 if compound: 6672 self._match_text_seq("SORTKEY") 6673 6674 if self._match(TokenType.L_PAREN, advance=False): 6675 return self.expression( 6676 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6677 ) 6678 6679 self._match_texts(("AUTO", "NONE")) 6680 return self.expression( 6681 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6682 ) 6683 6684 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6685 index = self._index - 1 6686 6687 partition_exists = self._parse_exists() 6688 if self._match(TokenType.PARTITION, advance=False): 6689 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6690 6691 self._retreat(index) 6692 return self._parse_csv(self._parse_drop_column) 6693 6694 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6695 if self._match(TokenType.COLUMN): 6696 exists = self._parse_exists() 6697 old_column = self._parse_column() 6698 to = self._match_text_seq("TO") 6699 new_column = self._parse_column() 6700 6701 if old_column is None or to is None or new_column is None: 6702 return None 6703 6704 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6705 6706 self._match_text_seq("TO") 6707 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6708 6709 def _parse_alter_table_set(self) -> exp.AlterSet: 6710 alter_set = self.expression(exp.AlterSet) 6711 6712 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6713 "TABLE", "PROPERTIES" 6714 ): 6715 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6716 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6717 alter_set.set("expressions", [self._parse_assignment()]) 6718 elif self._match_texts(("LOGGED", "UNLOGGED")): 6719 alter_set.set("option", exp.var(self._prev.text.upper())) 6720 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6721 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6722 elif self._match_text_seq("LOCATION"): 6723 alter_set.set("location", self._parse_field()) 6724 elif self._match_text_seq("ACCESS", "METHOD"): 6725 alter_set.set("access_method", self._parse_field()) 6726 elif self._match_text_seq("TABLESPACE"): 6727 alter_set.set("tablespace", self._parse_field()) 6728 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6729 alter_set.set("file_format", [self._parse_field()]) 6730 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6731 alter_set.set("file_format", self._parse_wrapped_options()) 6732 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6733 alter_set.set("copy_options", self._parse_wrapped_options()) 6734 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6735 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6736 else: 6737 if self._match_text_seq("SERDE"): 6738 alter_set.set("serde", self._parse_field()) 6739 6740 alter_set.set("expressions", [self._parse_properties()]) 6741 6742 return alter_set 6743 6744 def _parse_alter(self) -> exp.Alter | exp.Command: 6745 start = self._prev 6746 6747 alter_token = self._match_set(self.ALTERABLES) and self._prev 6748 if not alter_token: 6749 return self._parse_as_command(start) 6750 6751 exists = self._parse_exists() 6752 only = self._match_text_seq("ONLY") 6753 this = self._parse_table(schema=True) 6754 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6755 6756 if self._next: 6757 self._advance() 6758 6759 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6760 if parser: 6761 actions = ensure_list(parser(self)) 6762 options = self._parse_csv(self._parse_property) 6763 6764 if not self._curr and actions: 6765 return self.expression( 6766 exp.Alter, 6767 this=this, 6768 kind=alter_token.text.upper(), 6769 exists=exists, 6770 actions=actions, 6771 only=only, 6772 options=options, 6773 cluster=cluster, 6774 ) 6775 6776 return self._parse_as_command(start) 6777 6778 def _parse_merge(self) -> exp.Merge: 6779 self._match(TokenType.INTO) 6780 target = self._parse_table() 6781 6782 if target and self._match(TokenType.ALIAS, advance=False): 6783 target.set("alias", self._parse_table_alias()) 6784 6785 self._match(TokenType.USING) 6786 using = self._parse_table() 6787 6788 self._match(TokenType.ON) 6789 on = self._parse_assignment() 6790 6791 return self.expression( 6792 exp.Merge, 6793 this=target, 6794 using=using, 6795 on=on, 6796 expressions=self._parse_when_matched(), 6797 ) 6798 6799 def _parse_when_matched(self) -> t.List[exp.When]: 6800 whens = [] 6801 6802 while self._match(TokenType.WHEN): 6803 matched = not self._match(TokenType.NOT) 6804 self._match_text_seq("MATCHED") 6805 source = ( 6806 False 6807 if self._match_text_seq("BY", "TARGET") 6808 else self._match_text_seq("BY", "SOURCE") 6809 ) 6810 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6811 6812 self._match(TokenType.THEN) 6813 6814 if self._match(TokenType.INSERT): 6815 _this = self._parse_star() 6816 if _this: 6817 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6818 else: 6819 then = self.expression( 6820 exp.Insert, 6821 this=self._parse_value(), 6822 expression=self._match_text_seq("VALUES") and self._parse_value(), 6823 ) 6824 elif self._match(TokenType.UPDATE): 6825 expressions = self._parse_star() 6826 if expressions: 6827 then = self.expression(exp.Update, expressions=expressions) 6828 else: 6829 then = self.expression( 6830 exp.Update, 6831 expressions=self._match(TokenType.SET) 6832 and self._parse_csv(self._parse_equality), 6833 ) 6834 elif self._match(TokenType.DELETE): 6835 then = self.expression(exp.Var, this=self._prev.text) 6836 else: 6837 then = None 6838 6839 whens.append( 6840 self.expression( 6841 exp.When, 6842 matched=matched, 6843 source=source, 6844 condition=condition, 6845 then=then, 6846 ) 6847 ) 6848 return whens 6849 6850 def _parse_show(self) -> t.Optional[exp.Expression]: 6851 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6852 if parser: 6853 return parser(self) 6854 return self._parse_as_command(self._prev) 6855 6856 def _parse_set_item_assignment( 6857 self, kind: t.Optional[str] = None 6858 ) -> t.Optional[exp.Expression]: 6859 index = self._index 6860 6861 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6862 return self._parse_set_transaction(global_=kind == "GLOBAL") 6863 6864 left = self._parse_primary() or self._parse_column() 6865 assignment_delimiter = self._match_texts(("=", "TO")) 6866 6867 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6868 self._retreat(index) 6869 return None 6870 6871 right = self._parse_statement() or self._parse_id_var() 6872 if isinstance(right, (exp.Column, exp.Identifier)): 6873 right = exp.var(right.name) 6874 6875 this = self.expression(exp.EQ, this=left, expression=right) 6876 return self.expression(exp.SetItem, this=this, kind=kind) 6877 6878 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6879 self._match_text_seq("TRANSACTION") 6880 characteristics = self._parse_csv( 6881 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6882 ) 6883 return self.expression( 6884 exp.SetItem, 6885 expressions=characteristics, 6886 kind="TRANSACTION", 6887 **{"global": global_}, # type: ignore 6888 ) 6889 6890 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6891 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6892 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6893 6894 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6895 index = self._index 6896 set_ = self.expression( 6897 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6898 ) 6899 6900 if self._curr: 6901 self._retreat(index) 6902 return self._parse_as_command(self._prev) 6903 6904 return set_ 6905 6906 def _parse_var_from_options( 6907 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6908 ) -> t.Optional[exp.Var]: 6909 start = self._curr 6910 if not start: 6911 return None 6912 6913 option = start.text.upper() 6914 continuations = options.get(option) 6915 6916 index = self._index 6917 self._advance() 6918 for keywords in continuations or []: 6919 if isinstance(keywords, str): 6920 keywords = (keywords,) 6921 6922 if self._match_text_seq(*keywords): 6923 option = f"{option} {' '.join(keywords)}" 6924 break 6925 else: 6926 if continuations or continuations is None: 6927 if raise_unmatched: 6928 self.raise_error(f"Unknown option {option}") 6929 6930 self._retreat(index) 6931 return None 6932 6933 return exp.var(option) 6934 6935 def _parse_as_command(self, start: Token) -> exp.Command: 6936 while self._curr: 6937 self._advance() 6938 text = self._find_sql(start, self._prev) 6939 size = len(start.text) 6940 self._warn_unsupported() 6941 return exp.Command(this=text[:size], expression=text[size:]) 6942 6943 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6944 settings = [] 6945 6946 self._match_l_paren() 6947 kind = self._parse_id_var() 6948 6949 if self._match(TokenType.L_PAREN): 6950 while True: 6951 key = self._parse_id_var() 6952 value = self._parse_primary() 6953 6954 if not key and value is None: 6955 break 6956 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6957 self._match(TokenType.R_PAREN) 6958 6959 self._match_r_paren() 6960 6961 return self.expression( 6962 exp.DictProperty, 6963 this=this, 6964 kind=kind.this if kind else None, 6965 settings=settings, 6966 ) 6967 6968 def _parse_dict_range(self, this: str) -> exp.DictRange: 6969 self._match_l_paren() 6970 has_min = self._match_text_seq("MIN") 6971 if has_min: 6972 min = self._parse_var() or self._parse_primary() 6973 self._match_text_seq("MAX") 6974 max = self._parse_var() or self._parse_primary() 6975 else: 6976 max = self._parse_var() or self._parse_primary() 6977 min = exp.Literal.number(0) 6978 self._match_r_paren() 6979 return self.expression(exp.DictRange, this=this, min=min, max=max) 6980 6981 def _parse_comprehension( 6982 self, this: t.Optional[exp.Expression] 6983 ) -> t.Optional[exp.Comprehension]: 6984 index = self._index 6985 expression = self._parse_column() 6986 if not self._match(TokenType.IN): 6987 self._retreat(index - 1) 6988 return None 6989 iterator = self._parse_column() 6990 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6991 return self.expression( 6992 exp.Comprehension, 6993 this=this, 6994 expression=expression, 6995 iterator=iterator, 6996 condition=condition, 6997 ) 6998 6999 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7000 if self._match(TokenType.HEREDOC_STRING): 7001 return self.expression(exp.Heredoc, this=self._prev.text) 7002 7003 if not self._match_text_seq("$"): 7004 return None 7005 7006 tags = ["$"] 7007 tag_text = None 7008 7009 if self._is_connected(): 7010 self._advance() 7011 tags.append(self._prev.text.upper()) 7012 else: 7013 self.raise_error("No closing $ found") 7014 7015 if tags[-1] != "$": 7016 if self._is_connected() and self._match_text_seq("$"): 7017 tag_text = tags[-1] 7018 tags.append("$") 7019 else: 7020 self.raise_error("No closing $ found") 7021 7022 heredoc_start = self._curr 7023 7024 while self._curr: 7025 if self._match_text_seq(*tags, advance=False): 7026 this = self._find_sql(heredoc_start, self._prev) 7027 self._advance(len(tags)) 7028 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7029 7030 self._advance() 7031 7032 self.raise_error(f"No closing {''.join(tags)} found") 7033 return None 7034 7035 def _find_parser( 7036 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7037 ) -> t.Optional[t.Callable]: 7038 if not self._curr: 7039 return None 7040 7041 index = self._index 7042 this = [] 7043 while True: 7044 # The current token might be multiple words 7045 curr = self._curr.text.upper() 7046 key = curr.split(" ") 7047 this.append(curr) 7048 7049 self._advance() 7050 result, trie = in_trie(trie, key) 7051 if result == TrieResult.FAILED: 7052 break 7053 7054 if result == TrieResult.EXISTS: 7055 subparser = parsers[" ".join(this)] 7056 return subparser 7057 7058 self._retreat(index) 7059 return None 7060 7061 def _match(self, token_type, advance=True, expression=None): 7062 if not self._curr: 7063 return None 7064 7065 if self._curr.token_type == token_type: 7066 if advance: 7067 self._advance() 7068 self._add_comments(expression) 7069 return True 7070 7071 return None 7072 7073 def _match_set(self, types, advance=True): 7074 if not self._curr: 7075 return None 7076 7077 if self._curr.token_type in types: 7078 if advance: 7079 self._advance() 7080 return True 7081 7082 return None 7083 7084 def _match_pair(self, token_type_a, token_type_b, advance=True): 7085 if not self._curr or not self._next: 7086 return None 7087 7088 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7089 if advance: 7090 self._advance(2) 7091 return True 7092 7093 return None 7094 7095 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7096 if not self._match(TokenType.L_PAREN, expression=expression): 7097 self.raise_error("Expecting (") 7098 7099 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7100 if not self._match(TokenType.R_PAREN, expression=expression): 7101 self.raise_error("Expecting )") 7102 7103 def _match_texts(self, texts, advance=True): 7104 if ( 7105 self._curr 7106 and self._curr.token_type != TokenType.STRING 7107 and self._curr.text.upper() in texts 7108 ): 7109 if advance: 7110 self._advance() 7111 return True 7112 return None 7113 7114 def _match_text_seq(self, *texts, advance=True): 7115 index = self._index 7116 for text in texts: 7117 if ( 7118 self._curr 7119 and self._curr.token_type != TokenType.STRING 7120 and self._curr.text.upper() == text 7121 ): 7122 self._advance() 7123 else: 7124 self._retreat(index) 7125 return None 7126 7127 if not advance: 7128 self._retreat(index) 7129 7130 return True 7131 7132 def _replace_lambda( 7133 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7134 ) -> t.Optional[exp.Expression]: 7135 if not node: 7136 return node 7137 7138 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7139 7140 for column in node.find_all(exp.Column): 7141 typ = lambda_types.get(column.parts[0].name) 7142 if typ is not None: 7143 dot_or_id = column.to_dot() if column.table else column.this 7144 7145 if typ: 7146 dot_or_id = self.expression( 7147 exp.Cast, 7148 this=dot_or_id, 7149 to=typ, 7150 ) 7151 7152 parent = column.parent 7153 7154 while isinstance(parent, exp.Dot): 7155 if not isinstance(parent.parent, exp.Dot): 7156 parent.replace(dot_or_id) 7157 break 7158 parent = parent.parent 7159 else: 7160 if column is node: 7161 node = dot_or_id 7162 else: 7163 column.replace(dot_or_id) 7164 return node 7165 7166 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7167 start = self._prev 7168 7169 # Not to be confused with TRUNCATE(number, decimals) function call 7170 if self._match(TokenType.L_PAREN): 7171 self._retreat(self._index - 2) 7172 return self._parse_function() 7173 7174 # Clickhouse supports TRUNCATE DATABASE as well 7175 is_database = self._match(TokenType.DATABASE) 7176 7177 self._match(TokenType.TABLE) 7178 7179 exists = self._parse_exists(not_=False) 7180 7181 expressions = self._parse_csv( 7182 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7183 ) 7184 7185 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7186 7187 if self._match_text_seq("RESTART", "IDENTITY"): 7188 identity = "RESTART" 7189 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7190 identity = "CONTINUE" 7191 else: 7192 identity = None 7193 7194 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7195 option = self._prev.text 7196 else: 7197 option = None 7198 7199 partition = self._parse_partition() 7200 7201 # Fallback case 7202 if self._curr: 7203 return self._parse_as_command(start) 7204 7205 return self.expression( 7206 exp.TruncateTable, 7207 expressions=expressions, 7208 is_database=is_database, 7209 exists=exists, 7210 cluster=cluster, 7211 identity=identity, 7212 option=option, 7213 partition=partition, 7214 ) 7215 7216 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7217 this = self._parse_ordered(self._parse_opclass) 7218 7219 if not self._match(TokenType.WITH): 7220 return this 7221 7222 op = self._parse_var(any_token=True) 7223 7224 return self.expression(exp.WithOperator, this=this, op=op) 7225 7226 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7227 self._match(TokenType.EQ) 7228 self._match(TokenType.L_PAREN) 7229 7230 opts: t.List[t.Optional[exp.Expression]] = [] 7231 while self._curr and not self._match(TokenType.R_PAREN): 7232 if self._match_text_seq("FORMAT_NAME", "="): 7233 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7234 # so we parse it separately to use _parse_field() 7235 prop = self.expression( 7236 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7237 ) 7238 opts.append(prop) 7239 else: 7240 opts.append(self._parse_property()) 7241 7242 self._match(TokenType.COMMA) 7243 7244 return opts 7245 7246 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7247 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7248 7249 options = [] 7250 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7251 option = self._parse_var(any_token=True) 7252 prev = self._prev.text.upper() 7253 7254 # Different dialects might separate options and values by white space, "=" and "AS" 7255 self._match(TokenType.EQ) 7256 self._match(TokenType.ALIAS) 7257 7258 param = self.expression(exp.CopyParameter, this=option) 7259 7260 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7261 TokenType.L_PAREN, advance=False 7262 ): 7263 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7264 param.set("expressions", self._parse_wrapped_options()) 7265 elif prev == "FILE_FORMAT": 7266 # T-SQL's external file format case 7267 param.set("expression", self._parse_field()) 7268 else: 7269 param.set("expression", self._parse_unquoted_field()) 7270 7271 options.append(param) 7272 self._match(sep) 7273 7274 return options 7275 7276 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7277 expr = self.expression(exp.Credentials) 7278 7279 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7280 expr.set("storage", self._parse_field()) 7281 if self._match_text_seq("CREDENTIALS"): 7282 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7283 creds = ( 7284 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7285 ) 7286 expr.set("credentials", creds) 7287 if self._match_text_seq("ENCRYPTION"): 7288 expr.set("encryption", self._parse_wrapped_options()) 7289 if self._match_text_seq("IAM_ROLE"): 7290 expr.set("iam_role", self._parse_field()) 7291 if self._match_text_seq("REGION"): 7292 expr.set("region", self._parse_field()) 7293 7294 return expr 7295 7296 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7297 return self._parse_field() 7298 7299 def _parse_copy(self) -> exp.Copy | exp.Command: 7300 start = self._prev 7301 7302 self._match(TokenType.INTO) 7303 7304 this = ( 7305 self._parse_select(nested=True, parse_subquery_alias=False) 7306 if self._match(TokenType.L_PAREN, advance=False) 7307 else self._parse_table(schema=True) 7308 ) 7309 7310 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7311 7312 files = self._parse_csv(self._parse_file_location) 7313 credentials = self._parse_credentials() 7314 7315 self._match_text_seq("WITH") 7316 7317 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7318 7319 # Fallback case 7320 if self._curr: 7321 return self._parse_as_command(start) 7322 7323 return self.expression( 7324 exp.Copy, 7325 this=this, 7326 kind=kind, 7327 credentials=credentials, 7328 files=files, 7329 params=params, 7330 ) 7331 7332 def _parse_normalize(self) -> exp.Normalize: 7333 return self.expression( 7334 exp.Normalize, 7335 this=self._parse_bitwise(), 7336 form=self._match(TokenType.COMMA) and self._parse_var(), 7337 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1354 def __init__( 1355 self, 1356 error_level: t.Optional[ErrorLevel] = None, 1357 error_message_context: int = 100, 1358 max_errors: int = 3, 1359 dialect: DialectType = None, 1360 ): 1361 from sqlglot.dialects import Dialect 1362 1363 self.error_level = error_level or ErrorLevel.IMMEDIATE 1364 self.error_message_context = error_message_context 1365 self.max_errors = max_errors 1366 self.dialect = Dialect.get_or_raise(dialect) 1367 self.reset()
1379 def parse( 1380 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1381 ) -> t.List[t.Optional[exp.Expression]]: 1382 """ 1383 Parses a list of tokens and returns a list of syntax trees, one tree 1384 per parsed SQL statement. 1385 1386 Args: 1387 raw_tokens: The list of tokens. 1388 sql: The original SQL string, used to produce helpful debug messages. 1389 1390 Returns: 1391 The list of the produced syntax trees. 1392 """ 1393 return self._parse( 1394 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1395 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1397 def parse_into( 1398 self, 1399 expression_types: exp.IntoType, 1400 raw_tokens: t.List[Token], 1401 sql: t.Optional[str] = None, 1402 ) -> t.List[t.Optional[exp.Expression]]: 1403 """ 1404 Parses a list of tokens into a given Expression type. If a collection of Expression 1405 types is given instead, this method will try to parse the token list into each one 1406 of them, stopping at the first for which the parsing succeeds. 1407 1408 Args: 1409 expression_types: The expression type(s) to try and parse the token list into. 1410 raw_tokens: The list of tokens. 1411 sql: The original SQL string, used to produce helpful debug messages. 1412 1413 Returns: 1414 The target Expression. 1415 """ 1416 errors = [] 1417 for expression_type in ensure_list(expression_types): 1418 parser = self.EXPRESSION_PARSERS.get(expression_type) 1419 if not parser: 1420 raise TypeError(f"No parser registered for {expression_type}") 1421 1422 try: 1423 return self._parse(parser, raw_tokens, sql) 1424 except ParseError as e: 1425 e.errors[0]["into_expression"] = expression_type 1426 errors.append(e) 1427 1428 raise ParseError( 1429 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1430 errors=merge_errors(errors), 1431 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1471 def check_errors(self) -> None: 1472 """Logs or raises any found errors, depending on the chosen error level setting.""" 1473 if self.error_level == ErrorLevel.WARN: 1474 for error in self.errors: 1475 logger.error(str(error)) 1476 elif self.error_level == ErrorLevel.RAISE and self.errors: 1477 raise ParseError( 1478 concat_messages(self.errors, self.max_errors), 1479 errors=merge_errors(self.errors), 1480 )
Logs or raises any found errors, depending on the chosen error level setting.
1482 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1483 """ 1484 Appends an error in the list of recorded errors or raises it, depending on the chosen 1485 error level setting. 1486 """ 1487 token = token or self._curr or self._prev or Token.string("") 1488 start = token.start 1489 end = token.end + 1 1490 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1491 highlight = self.sql[start:end] 1492 end_context = self.sql[end : end + self.error_message_context] 1493 1494 error = ParseError.new( 1495 f"{message}. Line {token.line}, Col: {token.col}.\n" 1496 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1497 description=message, 1498 line=token.line, 1499 col=token.col, 1500 start_context=start_context, 1501 highlight=highlight, 1502 end_context=end_context, 1503 ) 1504 1505 if self.error_level == ErrorLevel.IMMEDIATE: 1506 raise error 1507 1508 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1510 def expression( 1511 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1512 ) -> E: 1513 """ 1514 Creates a new, validated Expression. 1515 1516 Args: 1517 exp_class: The expression class to instantiate. 1518 comments: An optional list of comments to attach to the expression. 1519 kwargs: The arguments to set for the expression along with their respective values. 1520 1521 Returns: 1522 The target expression. 1523 """ 1524 instance = exp_class(**kwargs) 1525 instance.add_comments(comments) if comments else self._add_comments(instance) 1526 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1533 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1534 """ 1535 Validates an Expression, making sure that all its mandatory arguments are set. 1536 1537 Args: 1538 expression: The expression to validate. 1539 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1540 1541 Returns: 1542 The validated expression. 1543 """ 1544 if self.error_level != ErrorLevel.IGNORE: 1545 for error_message in expression.error_messages(args): 1546 self.raise_error(error_message) 1547 1548 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.